Jean Fromentin il y a 6 ans
commit
da3eb62b8f
12 fichiers modifiés avec 1602 ajouts et 0 suppressions
  1. 49 0
      Makefile
  2. 17 0
      README.md
  3. 29 0
      monoid.cpp
  4. 293 0
      monoid.hpp
  5. 27 0
      monoid.pxd
  6. 64 0
      mred.sage
  7. 39 0
      numeric_monoid.pxd
  8. 716 0
      numeric_monoid.pyx
  9. 63 0
      setup.py
  10. 223 0
      treewalk.cpp
  11. 52 0
      treewalk.hpp
  12. 30 0
      treewalk.pxd

+ 49 - 0
Makefile

@@ -0,0 +1,49 @@
+# CILK_ROOT must contains the GCC/Cilk root directory
+OS	 		= $(shell uname)
+CPPFLAGS    = -DNDEBUG -DMAX_GENUS=$(MAX_GENUS) -DQUOTIENT=$(QUOTIENT)
+CXXFLAGS    = -std=c++11 -fcilkplus -g -Wall -O3 # -fsanitize=thread # -Winline
+
+ifeq ($(shell uname), Darwin)
+TARGET_ARCH = -march=corei7 -march=corei7
+LDFLAGS 	= -lcilkrts
+CXX		= g++-6
+else
+TARGET_ARCH = -march=corei7 -mtune=corei7
+LDFLAGS     = # -Wl,-rpath=$(CILK_ROOT)/lib64
+LDLIBS      = -lcilkrts # -ltsan
+endif
+
+TARGET = treewalk
+
+SAGE = sage
+PYTHON = $(SAGE) -python
+
+# Experimental TBB stuff
+# Suppose that TBB is installed in $(TBB_ROOT)
+ifdef USE_TBB
+CPPFLAGS += -DTBB=1 -I$(TBB_ROOT)/include
+LDFLAGS +=-Wl,-rpath=$(TBB_ROOT)/lib/intel64/gcc4.4 -L$(TBB_ROOT)/lib/intel64/gcc4.4 -ltbbmalloc
+endif
+
+# Pour compiler avec une valeur différente: make MAX_GENUS=35
+DEFAULT_MAX_GENUS=40
+MAX_GENUS=$(DEFAULT_MAX_GENUS)
+DEFAULT_QUOTIENT=3
+QUOTIENT=$(DEFAULT_QUOTIENT)
+
+all: $(TARGET)
+
+monoid.o: monoid.cpp monoid.hpp
+treewalk.o: treewalk.cpp treewalk.hpp monoid.hpp
+treewalk: treewalk.o monoid.o
+	$(CXX) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@
+
+numeric_monoid.so: treewalk.cpp treewalk.hpp monoid.cpp monoid.hpp numeric_monoid.pxd numeric_monoid.pyx monoid.pxd treewalk.pxd
+	$(PYTHON) setup.py build_ext --inplace
+
+clean:
+	rm -rf $(TARGET) *.o build cysignals numeric_monoid.so numeric_monoid.html numeric_monoid.cpp
+
+test: all
+	./treewalk
+	$(SAGE) -t --force-lib numeric_monoid.pyx

+ 17 - 0
README.md

@@ -0,0 +1,17 @@
+# NumericMonoid: Cilk++ version
+
+This is the last and definitive version of NumericMonoid.
+
+It requires GCC 5 and (not mandatory) [Sage](http://www.sagemath.org/).
+
+## Building
+
+If you have multiple versions of GCC installed on your system, you have to specify it to the `make` command:
+```
+CXX=g++-5 make MAX_GENUS=35
+```
+
+If you only have GCC 5 installed and wish to use the default `MAX_GENUS` (40), simply launch:
+```
+make
+```

+ 29 - 0
monoid.cpp

@@ -0,0 +1,29 @@
+#include <iostream>
+#include "monoid.hpp"
+
+void init_full_N(monoid &m)
+{
+  epi8 block ={1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8};
+  for(auto i=0; i<NBLOCKS; i++){
+    m.blocks[i] = block;
+    block = block + 8;
+  }
+  m.genus = 0;
+  m.conductor = 1;
+  m.min = 1;
+}
+
+void print_monoid(const monoid &m)
+{
+  unsigned int i;
+  std::cout<<"min = "<<m.min<<", cond = "<<m.conductor<<", genus = "<<m.genus<<", decs = ";
+  for (i=0; i<SIZE; i++) std::cout<<((int) m.decs[i])<<' ';
+  std::cout<<std::endl;
+}
+
+void print_epi8(epi8 bl)
+{
+  unsigned int i;
+  for (i=0; i<16; i++) std::cout<<((uint8_t*)&bl)[i]<<' ';
+  std::cout<<std::endl;
+}

+ 293 - 0
monoid.hpp

@@ -0,0 +1,293 @@
+#ifndef MONOID_HPP
+#define MONOID_HPP
+
+#include <cstdint>
+#include <cmath>
+
+// We cant have those as C++ constant because of the #define used below in
+// remove_generator manual loop unrolling. I don't know if the unrolling is
+// doable using template metaprogamming. I didn't manage to figure out how.
+
+#ifndef MAX_GENUS
+#error "Please define the MAX_GENUS macro"
+#endif
+#define SIZE_BOUND (3*(MAX_GENUS-1))
+#define NBLOCKS ((SIZE_BOUND+15) >> 4)
+#define SIZE (NBLOCKS << 4)
+// const uint_fast8_t MAX_GENUS = 40;
+// const uint_fast8_t SIZE_BOUND = (3*(MAX_GENUS-1));
+// const uint_fast8_t NBLOCKS = ((SIZE_BOUND+15) >> 4);
+// const uint_fast8_t SIZE = (NBLOCKS << 4);
+
+#include <x86intrin.h>
+
+typedef uint8_t epi8 __attribute__ ((vector_size (16)));
+typedef uint8_t dec_numbers[SIZE] __attribute__ ((aligned (16)));
+typedef epi8 dec_blocks[NBLOCKS];
+typedef uint_fast64_t ind_t;  // The type used for array indexes
+
+struct monoid
+{
+  union {
+    dec_numbers decs;
+    dec_blocks blocks;
+  };
+  // Dont use char as they have to be promoted to 64 bits to do pointer arithmetic.
+  ind_t conductor, min, genus;
+  int quotient() const {return ceil(float(conductor)/float(min));};
+};
+
+void init_full_N(monoid &);
+void print_monoid(const monoid &);
+void print_epi8(epi8);
+inline void copy_blocks(      dec_blocks &__restrict__ dst,
+			const dec_blocks &__restrict__ src);
+inline void remove_generator(monoid &__restrict__ dst,
+		      const monoid &__restrict__ src,
+		      ind_t gen);
+inline monoid remove_generator(const monoid &src, ind_t gen);
+
+
+// typedef enum { ALL, CHILDREN } generator_type;
+class ALL {};
+class CHILDREN {};
+
+// template <generator_type T> class generator_iter
+template <class T> class generator_iter
+{
+private:
+
+  const monoid &m;
+  unsigned int mask;   // movemask_epi8 returns a 32 bits values
+  ind_t iblock, gen, bound;
+
+public:
+
+  generator_iter(const monoid &mon);
+  bool move_next();
+  uint8_t count();
+  inline ind_t get_gen() const {return gen; };
+};
+
+
+
+///////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////
+////////////////   Implementation part here for inlining   ////////////////
+///////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////
+
+/*
+Note: for some reason the code using gcc vector variables is 2-3% faster than
+the code using gcc intrinsic instructions.
+Here are the results of two runs:
+data_mmx =  [9.757, 9.774, 9.757, 9.761, 9.811, 9.819, 9.765, 9.888, 9.774, 9.771]
+data_epi8 = [9.592, 9.535, 9.657, 9.468, 9.460, 9.679, 9.458, 9.461, 9.629, 9.474]
+*/
+
+extern inline epi8 load_unaligned_epi8(const uint8_t *t)
+{ return (epi8) _mm_loadu_si128((__m128i *) (t)); };
+
+extern inline epi8 shuffle_epi8(epi8 b, epi8 sh)    // Require SSE 3
+{ return (epi8) _mm_shuffle_epi8((__m128i) b, (__m128i) sh);}
+
+extern inline int movemask_epi8(epi8 b)
+{ return _mm_movemask_epi8((__m128i) b);};
+
+const epi8 zero   = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+const epi8 block1 = zero + 1;
+const uint8_t m1 = 255;
+const epi8 shift16[16] =
+  { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
+    {m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14},
+    {m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13},
+    {m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12},
+    {m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11},
+    {m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10},
+    {m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+    {m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8},
+    {m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0} };
+const epi8 mask16[16] =
+  { {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1} };
+
+
+template<> inline generator_iter<ALL>::generator_iter(const monoid &mon)
+  : m(mon), bound((mon.conductor+mon.min+15) >> 4)
+{
+  epi8 block;
+  iblock = 0;
+  block = m.blocks[0];
+  mask  = movemask_epi8(block == block1);
+  mask &= 0xFFFE; // 0 is not a generator
+  gen = - 1;
+};
+
+template<> inline generator_iter<CHILDREN>::generator_iter(const monoid &mon)
+  : m(mon), bound((mon.conductor+mon.min+15) >> 4)
+{
+  epi8 block;
+  iblock = m.conductor >> 4;
+  block = m.blocks[iblock] & mask16[m.conductor & 0xF];
+  mask  = movemask_epi8(block == block1);
+  gen = (iblock << 4) - 1;
+};
+
+template <class T> inline uint8_t generator_iter<T>::count()
+{
+  uint8_t nbr = _mm_popcnt_u32(mask); // popcnt returns a 8 bits value
+  for (ind_t ib = iblock+1; ib < bound; ib++)
+    nbr += _mm_popcnt_u32(movemask_epi8(m.blocks[ib] == block1));
+  return nbr;
+};
+
+template <class T> inline bool generator_iter<T>::move_next()
+{
+  while (!mask)
+    {
+      iblock++;
+      if (iblock > bound) return false;
+      gen = (iblock << 4) - 1;
+      mask  = movemask_epi8(m.blocks[iblock] == block1);
+    }
+  unsigned char shift = __bsfd (mask) + 1; // Bit Scan Forward
+  gen += shift;
+  mask >>= shift;
+  return true;
+};
+
+
+
+inline __attribute__((always_inline))
+void copy_blocks(dec_blocks &dst, dec_blocks const &src)
+{
+  for (ind_t i=0; i<NBLOCKS; i++) dst[i] = src[i];
+}
+
+
+#include <cassert>
+
+inline __attribute__((always_inline))
+void remove_generator(monoid &__restrict__ dst,
+		      const monoid &__restrict__ src,
+		      ind_t gen)
+{
+  ind_t start_block, decal;
+  epi8 block;
+
+  assert(src.decs[gen] == 1);
+
+  dst.conductor = gen + 1;
+  dst.genus = src.genus + 1;
+  dst.min = (gen == src.min) ? dst.conductor : src.min;
+
+  copy_blocks(dst.blocks, src.blocks);
+
+  start_block = gen >> 4;
+  decal = gen & 0xF;
+  // Shift block by decal uchar
+  block = shuffle_epi8(src.blocks[0], shift16[decal]);
+  dst.blocks[start_block] -= ((block != zero) & block1);
+#if NBLOCKS >= 5
+
+#define CASE_UNROLL(i_loop)			\
+  case i_loop : \
+    dst.blocks[i_loop+1] -= (load_unaligned_epi8(srcblock) != zero) & block1; \
+    srcblock += sizeof(epi8);
+
+  {
+    const uint8_t *srcblock = src.decs + sizeof(epi8) - decal;
+  switch(start_block)
+    {
+      CASE_UNROLL(0);
+#if NBLOCKS > 2
+      CASE_UNROLL(1);
+#endif
+#if NBLOCKS > 3
+      CASE_UNROLL(2);
+#endif
+#if NBLOCKS > 4
+      CASE_UNROLL(3);
+#endif
+#if NBLOCKS > 5
+      CASE_UNROLL(4);
+#endif
+#if NBLOCKS > 6
+      CASE_UNROLL(5);
+#endif
+#if NBLOCKS > 7
+      CASE_UNROLL(6);
+#endif
+#if NBLOCKS > 8
+      CASE_UNROLL(7);
+#endif
+#if NBLOCKS > 9
+      CASE_UNROLL(8);
+#endif
+#if NBLOCKS > 10
+      CASE_UNROLL(9);
+#endif
+#if NBLOCKS > 11
+      CASE_UNROLL(10);
+#endif
+#if NBLOCKS > 12
+      CASE_UNROLL(11);
+#endif
+#if NBLOCKS > 13
+      CASE_UNROLL(12);
+#endif
+#if NBLOCKS > 14
+      CASE_UNROLL(13);
+#endif
+#if NBLOCKS > 15
+      CASE_UNROLL(14);
+#endif
+#if NBLOCKS > 16
+#error "Too many blocks"
+#endif
+    }
+  }
+#else
+#warning "Loop not unrolled"
+
+  for (auto i=start_block+1; i<NBLOCKS; i++)
+    {
+      // The following won't work due to some alignment problem (bug in GCC 4.7.1?)
+      // block = *((epi8*)(src.decs + ((i-start_block)<<4) - decal));
+      block = load_unaligned_epi8(src.decs + ((i-start_block)<<4) - decal);
+      dst.blocks[i] -= ((block != zero) & block1);
+    }
+#endif
+
+  assert(dst.decs[dst.conductor-1] == 0);
+}
+
+inline monoid remove_generator(const monoid &src, ind_t gen)
+{
+  monoid dst;
+  remove_generator(dst, src, gen);
+  return dst;
+}
+
+#endif

+ 27 - 0
monoid.pxd

@@ -0,0 +1,27 @@
+from libc.stdint cimport uint8_t, uint_fast64_t
+
+cdef extern from "monoid.hpp":
+    enum: cMAX_GENUS "MAX_GENUS"
+    enum: cSIZE "SIZE"
+    ctypedef uint_fast64_t ind_t
+    ctypedef uint8_t dec_numbers[cSIZE]
+    cdef cppclass monoid:
+        dec_numbers decs
+        ind_t conductor, min, genus
+
+    void init_full_N(monoid &pm) nogil
+    void print_monoid(const monoid &) nogil
+    inline void remove_generator(monoid &dst, const monoid &src, ind_t) nogil
+    inline monoid remove_generator(const monoid &src, ind_t) nogil
+
+    # Fake type since Cython only allows for instanciation of template by types.
+    cdef cppclass ALL:
+        pass
+    cdef cppclass CHILDREN:
+        pass
+
+    cdef cppclass generator_iter[T]:
+        generator_iter(const monoid &mon) nogil
+        bint move_next() nogil
+        uint8_t count() nogil
+        ind_t get_gen() nogil

+ 64 - 0
mred.sage

@@ -0,0 +1,64 @@
+import numeric_monoid as nm
+from sage.parallel.map_reduce import RESetMapReduce as MR
+
+class CGenCount(MR):
+    def __init__(self, g):
+        MR.__init__(self)
+        self._g = g
+    def roots(self):
+        return [nm.Full]
+    def children(self, x):
+        return x.children() if x.genus() < self._g else []
+    def post_process(self, x):
+        return 1 if x.genus() == self._g else None
+
+class CGenCountMixed(MR):
+    def __init__(self, seuil, g):
+        MR.__init__(self)
+        self._seuil = seuil
+        self._g = g
+    def reduce_init(self):
+        return vector(ZZ, self._g)
+    def roots(self):
+        return [nm.Full]
+    def children(self, x):
+        return x.children() if x.genus() < self._seuil else []
+    def map_function(self, x):
+        if x.genus() < self._seuil:
+            res = vector(ZZ, self._g)
+            res[x.genus()] = 1
+            return res
+        # print "start"
+        res = x.walk_children(self._g)
+        # print "end"
+        return vector(ZZ, res)
+
+class CGen(CGenCount):
+    def __init__(self, g):
+        MR.__init__(self)
+        self._g = g
+    def roots(self):
+        return [nm.Full]
+    def children(self, x):
+        return x.children() if x.genus() < self._g else []
+    def post_process(self, x):
+        return x if x.genus() == self._g else None
+    def reduce_init(self):
+        return []
+    def map_function(self, x):
+        return [x]
+
+
+# def cgen(g):
+#     calc = MR(
+#         roots=[nm.Full],
+#         children=lambda x : x.children() if x.genus() < g else [],
+#         post_process=lambda x : x if x.genus() == g else None,
+#         reduce_init=[],
+#         map_function=lambda x : [x]
+#     )
+#     return calc.run()
+
+
+
+

+ 39 - 0
numeric_monoid.pxd

@@ -0,0 +1,39 @@
+from monoid cimport *
+from sage.structure.sage_object cimport SageObject
+
+cdef class MonoidList(object)
+
+cdef class NumericMonoid(SageObject):
+    cdef monoid _m
+
+    cpdef int genus(self)
+    cpdef int min(self)
+    cpdef int conductor(self)
+    cpdef _print(self)
+    cpdef NumericMonoid remove_generator(self, unsigned int gen)
+    cpdef int count_children(self)
+    cpdef list children(self)
+    cpdef list children_generators(self)
+    cpdef list successors(self)
+    cpdef list successor_generators(self)
+    cpdef list nth_generation(self, unsigned int n)
+    cpdef list generators(self)
+    cpdef list elements(self)
+    cpdef list gaps(self)
+    cpdef unsigned char[:] _decomposition_numbers(self)
+    cpdef list multiplicities(self)
+    cpdef list walk_children_stack(self, int bound)
+    cpdef list walk_children(self, int bound)
+    cpdef MonoidList nth_generation_cilk(self, unsigned int genus)
+
+cpdef NumericMonoid _from_pickle(type typ, int sz, int cond, int mn, int genus, tuple decs)
+
+from libcpp.list cimport list as stl_list
+
+cdef class MonoidList(object):
+    cdef stl_list[monoid] _l
+
+cdef class MonoidListIterator(object):
+    cdef MonoidList _ml
+    cdef stl_list[monoid].iterator _it, _end
+

+ 716 - 0
numeric_monoid.pyx

@@ -0,0 +1,716 @@
+r"""
+Recompile with::
+
+    sage -python setup.py build_ext --inplace
+
+Due to compilation outside Sage, the option `--force_lib` has ot be given for
+doctests to prevent Sage for recompiling this file. The doctest command is::
+
+    sage -t --force-lib numeric_monoid.pyx`
+
+And the each doctest series should start with::
+
+    sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+
+The following bound are fixed at compile time::
+
+    sage: SIZE
+    256
+    sage: MAX_GENUS
+    86
+
+.. warning::
+
+    Due to modular arithmetic the 255-th decomposition number of the full
+    monoid is considered as 0 instead of 256.
+"""
+import cython
+from sage.rings.integer cimport Integer
+from sage.rings.integer import GCD_list
+from sage.structure.sage_object cimport SageObject
+
+include 'cysignals/signals.pxi'
+
+from monoid cimport *
+from treewalk cimport *
+
+SIZE = cSIZE
+MAX_GENUS = cMAX_GENUS
+
+print_gen = True
+
+cdef class NumericMonoid(object):
+    r"""
+    sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+    sage: Full
+    < 1 >
+    """
+
+    def __init__(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: NumericMonoid()
+        < 1 >
+        """
+        init_full_N(self._m)
+
+    cpdef int genus(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.genus()
+        0
+        sage: NumericMonoid.from_generators([3,5]).genus()
+        4
+        """
+        return self._m.genus
+
+    cpdef int min(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.min()
+        1
+        sage: NumericMonoid.from_generators([3,5]).min()
+        3
+        """
+        return self._m.min
+
+    cpdef int conductor(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.conductor()
+        1
+        sage: NumericMonoid.from_generators([3,5]).conductor()
+        8
+        """
+        return self._m.conductor
+
+    cpdef _print(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full._print()
+        min = 1, cond = 1, genus = 0, decs = 1 1 2 2 3 3 4 4 ...  128 128
+        sage: NumericMonoid.from_generators([3,5])._print()
+        min = 3, cond = 8, genus = 4, decs = 1 0 0 1 0 1 2 0 2 2 2 3 3 3 4 4 5 5 ... 124 124
+        """
+        print_monoid(self._m)
+
+    def __contains__(self, unsigned int i):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: m = NumericMonoid.from_generators([3,5])
+        sage: [i in m for i in range(10)]
+        [True, False, False, True, False, True, True, False, True, True]
+        sage: 1000 in m
+        True
+        """
+        if i > self._m.conductor:
+            return True
+        return self._m.decs[i] != 0
+
+    cpdef list elements(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.elements()
+        [0, 1]
+        sage: NumericMonoid.from_generators([3,5]).elements()
+        [0, 3, 5, 6, 8]
+        """
+        cdef list res = []
+        cdef ind_t i
+        for i in range(self._m.conductor+1):
+            if self._m.decs[i] > 0:
+                res.append(int(i))
+        return res
+
+    cpdef list gaps(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.gaps()
+        []
+        sage: NumericMonoid.from_generators([3,5]).gaps()
+        [1, 2, 4, 7]
+        sage: all(len(m.gaps())==m.genus() for i in range(6) for m in Full.nth_generation(i))
+        True
+        """
+        cdef list res = []
+        cdef ind_t i
+        for i in range(self._m.conductor):
+            if self._m.decs[i] == 0:
+                res.append(int(i))
+        return res
+
+    def __repr__(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full                            # indirect doctest
+        < 1 >
+        sage: NumericMonoid.from_generators([3,5])   # indirect doctest
+        < 3 5 >
+        """
+        cdef str res
+        cdef ind_t i
+        if print_gen:
+            return "< "+" ".join(str(i) for i in self.generators())+" >"
+        else:
+            res = "Mon("
+            for i in range(self._m.conductor+2):
+                if self._m.decs[i] > 0:
+                   res += "%i "%i
+            return res+"...)"
+
+    def _latex_(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full._latex_()
+        '\\langle\\,1\\,\\rangle'
+        sage: NumericMonoid.from_generators([3,5])._latex_()
+        '\\langle\\,3\\,5\\,\\rangle'
+        """
+        cdef int i
+        if print_gen:
+            return "\\langle\\,"+"\\,".join(str(i) for i in self.generators())+"\\,\\rangle"
+
+    def __reduce__(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.__reduce__()
+        (<built-in function _from_pickle>, (<type 'numeric_monoid.NumericMonoid'>, 256, 1L, 1L, 0L, (1, 1, 2, 2, 3, 3, 4, 4, ..., 128)))
+        sage: NumericMonoid.from_generators([3,5]).__reduce__()
+        (<built-in function _from_pickle>, (<type 'numeric_monoid.NumericMonoid'>, 256, 8L, 3L, 4L, (1, 0, 0, 1, 0, 1, 2, 0, 2, 2, ..., 124, 124)))
+        """
+        return (_from_pickle,
+                (NumericMonoid, cSIZE,
+                 self._m.conductor, self._m.min, self._m.genus,
+                 tuple(self._decomposition_numbers())))
+
+
+    # < 0    <= 1    == 2    != 3    > 4    >= 5
+    def __richcmp__(NumericMonoid self, NumericMonoid other, int op):
+        """
+        Inclusion Order
+
+        EXAMPLE::
+
+            sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+            sage: m1 = NumericMonoid.from_generators([3,5,7])
+            sage: Full == Full, Full != Full, Full == m1, Full != m1
+            (True, False, False, True)
+            sage: m1 == m1, m1 != m1, m1 < m1, m1 <= m1, m1 > m1, m1 >= m1
+            (True, False, False, True, False, True)
+            sage: Full < m1, m1 > Full, Full > m1, m1 < Full
+            (False, False, True, True)
+        """
+        cdef bint eq, incl
+        eq = (self._m.conductor ==  other._m.conductor and
+              self._m.min ==  other._m.min and
+              self.genus ==  other.genus and
+              all(self._m.decs[i] == other._m.decs[i] for i in range(cSIZE)))
+        if   op == 2: return eq
+        elif op == 3: return not eq
+        elif op < 2:
+            incl = all(i in other for i in self.generators())
+            if   op == 0: return incl and not eq
+            elif op == 1: return incl
+        else:
+            incl = all(i in self for i in other.generators())
+            if   op == 4: return incl and not eq
+            elif op == 5: return incl
+
+    def __hash__(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.__hash__()
+        7122582034698508706
+        sage: hash(NumericMonoid.from_generators([3,5,7]))
+        7609276871119479011
+        sage: len(set(Full.nth_generation(4)))
+        7
+        """
+        return hash((self.conductor(), self.min(), self.genus(),
+                     tuple(self._decomposition_numbers())))
+
+
+    cpdef NumericMonoid remove_generator(self, unsigned int gen):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.remove_generator(1)
+        < 2 3 >
+        sage: NumericMonoid.from_generators([3,5,7]).remove_generator(7)
+        < 3 5 >
+        sage: NumericMonoid.from_generators([3,5,7]).remove_generator(8)
+        Traceback (most recent call last):
+        ...
+        ValueError: 8 is not a generator for < 3 5 7 >
+        """
+        cdef NumericMonoid res
+        res = NumericMonoid.__new__(NumericMonoid)
+        if gen > cSIZE or self._m.decs[gen] != 1:
+            raise ValueError, "%i is not a generator for %s"%(gen, self)
+        remove_generator(res._m, self._m, gen)
+        return res
+
+    cpdef list generators(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.generators()
+        [1]
+        sage: NumericMonoid.from_generators([3,5,7]).generators()
+        [3, 5, 7]
+        """
+        cdef list res = []
+        cdef int gen
+        cdef generator_iter[ALL] *iter = new generator_iter[ALL](self._m)
+        while iter.move_next():
+            res.append(int(iter.get_gen()))
+        del iter
+        return res
+
+    cpdef int count_children(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.count_children()
+        1
+        sage: NumericMonoid.from_generators([3,5,7]).count_children()
+        2
+        """
+        cdef int res
+        cdef generator_iter[CHILDREN] *iter = new generator_iter[CHILDREN](self._m)
+        res = iter.count()
+        del iter
+        return res
+
+    cpdef list children(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.children()
+        [< 2 3 >]
+        sage: NumericMonoid.from_generators([3,5,7]).children()
+        [< 3 7 8 >, < 3 5 >]
+        """
+        cdef list res = []
+        cdef generator_iter[CHILDREN] *iter = new generator_iter[CHILDREN](self._m)
+        while iter.move_next():
+            res.append(self.remove_generator(iter.get_gen()))
+        del iter
+        return res
+
+    cpdef list children_generators(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.children_generators()
+        [1]
+        sage: NumericMonoid.from_generators([3,5,7]).children_generators()
+        [5, 7]
+        """
+        cdef list res = []
+        cdef int gen
+        cdef generator_iter[CHILDREN] *iter = new generator_iter[CHILDREN](self._m)
+        while iter.move_next():
+            res.append(int(iter.get_gen()))
+        del iter
+        return res
+
+    cpdef list successors(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.successors()
+        [< 2 3 >]
+        sage: NumericMonoid.from_generators([3,5,7]).successors()
+        [< 5 6 7 8 9 >, < 3 7 8 >, < 3 5 >]
+        """
+        cdef list res = []
+        cdef generator_iter[ALL] *iter = new generator_iter[ALL](self._m)
+        while iter.move_next():
+            # Straigten a monoid obtained by removing an non children generators
+            try:
+                newmon = self.from_generators(self.remove_generator(iter.get_gen()).generators())
+            except ValueError:
+                pass # Ignore the result if GCD is not 1 anymore.
+            else:
+                res.append(newmon)
+        del iter
+        return res
+
+    cpdef list successor_generators(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.successor_generators()
+        [1]
+        sage: NumericMonoid.from_generators([3,5,7]).successor_generators()
+        [3, 5, 7]
+        """
+        cdef list res = []
+        cdef int gen
+        cdef generator_iter[ALL] *iter = new generator_iter[ALL](self._m)
+        while iter.move_next():
+            res.append(int(iter.get_gen()))
+        del iter
+        return res
+
+    cpdef list nth_generation(self, unsigned int genus):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.nth_generation(3)
+        [< 4 5 6 7 >, < 3 5 7 >, < 3 4 >, < 2 7 >]
+        sage: NumericMonoid.from_generators([3,5,7]).nth_generation(8)
+        [< 3 13 14 >, < 3 11 16 >, < 3 10 17 >]
+        """
+        cdef ind_t i
+        cdef list lst = [self]
+        for i in range(self._m.genus, genus):
+            lst = [x for m in lst for x in m.children()]
+        return lst
+
+
+    cpdef MonoidList nth_generation_cilk(self, unsigned int genus):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: list(Full.nth_generation_cilk(0))
+        [< 1 >]
+        sage: list(Full.nth_generation_cilk(1))
+        [< 2 3 >]
+        sage: list(Full.nth_generation_cilk(2))
+        [< 3 4 5 >, < 2 5 >]
+        sage: list(Full.nth_generation_cilk(4))
+        [< 5 6 7 8 9 >, < 4 6 7 9 >, < 4 5 7 >, < 4 5 6 >, < 3 7 8 >, < 3 5 >, < 2 9 >]
+        """
+        cdef MonoidList res = MonoidList.__new__(MonoidList)
+        cilk_list_results.get_reference().clear()
+        sig_on()
+        with nogil:
+            list_children(self._m, genus)
+        sig_off()
+        res._l = cilk_list_results.get_value()
+        cilk_list_results.get_reference().clear()
+        return res
+
+    # don't know how to make it readonly !
+    cpdef unsigned char[:] _decomposition_numbers(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full._decomposition_numbers()
+        <MemoryView of 'array' at 0x...>
+        sage: len(list(Full._decomposition_numbers())) == 256
+        True
+        sage: len(list(NumericMonoid.from_generators([3,5,7])._decomposition_numbers())) == 256
+        True
+        """
+        cdef unsigned char[:] slice = (<unsigned char[:cSIZE]>
+                                        <unsigned char *> self._m.decs)
+        return slice
+
+    cpdef list multiplicities(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.multiplicities() == range(1, 257)
+        True
+        sage: NumericMonoid.from_generators([3,5,7]).multiplicities()
+        [1, 0, 0, 2, 0, 2, 3, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, ..., 249, 250]
+
+        """
+        cdef unsigned char[:] decs = self._decomposition_numbers()
+        cdef int i
+        cdef int mults[cSIZE]
+        cdef int[:] mults_view = mults
+        mults[0] = 1
+        for i in range(1, cSIZE):
+            mults[i] = decs[i] << 1
+            if i % 2 == 0 and mults[i/2] != 0:
+                mults[i] -= 1
+        return list(mults_view)
+
+    @classmethod
+    def from_generators(cls, list l):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: NumericMonoid.from_generators([1])
+        < 1 >
+        sage: NumericMonoid.from_generators([3,5,7])
+        < 3 5 7 >
+        sage: NumericMonoid.from_generators([3,5,8])
+        < 3 5 >
+        sage: NumericMonoid.from_generators([3,6])
+        Traceback (most recent call last):
+        ...
+        ValueError: gcd of generators must be 1
+        """
+        cdef int i
+        cdef set gens = {int(i) for i in l}
+        cdef NumericMonoid res = cls()
+        cdef generator_iter[CHILDREN] *iter = new generator_iter[CHILDREN](res._m)
+        if GCD_list(l) != 1:
+            raise ValueError, "gcd of generators must be 1"
+        while iter.move_next():
+            gen = iter.get_gen()
+            if gen not in gens:
+                res = res.remove_generator(gen)
+                del iter
+                iter = new generator_iter[CHILDREN](res._m)
+        del iter
+        return res
+
+    def gcd_small_generator(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.gcd_small_generator()
+        0
+        sage: NumericMonoid.from_generators([3,5,7]).gcd_small_generator()
+        3
+        """
+        if self._m.min == 1:     # self == Full
+            return Integer(0)
+        return GCD_list([i for i in self.generators() if i < self.conductor()])
+
+    def has_finite_descendance(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.has_finite_descendance()
+        False
+        sage: NumericMonoid.from_generators([3,5,7]).has_finite_descendance()
+        False
+        sage: NumericMonoid.from_generators([3,7]).has_finite_descendance()
+        True
+        """
+        return self.gcd_small_generator() == 1
+
+    def support_generating_series(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.support_generating_series()
+        -1/(x - 1)
+        sage: NumericMonoid.from_generators([3,5,7]).support_generating_series()
+        -(x^5 - x^4 + x^3 - x + 1)/(x - 1)
+        sage: NumericMonoid.from_generators([3,7]).support_generating_series()
+        -(x^12 - x^11 + x^9 - x^8 + x^6 - x^4 + x^3 - x + 1)/(x - 1)
+        """
+        from sage.calculus.var import var
+        x = var('x')
+        cdef int c = self.conductor()
+        return (sum(x**i for i in range(c) if i in self) +
+                x**c/(1-x)).normalize()
+
+    def multiplicities_generating_series(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.multiplicities_generating_series()
+        (x - 1)^(-2)
+        sage: NumericMonoid.from_generators([3,5,7]).multiplicities_generating_series()
+        (x^5 - x^4 + x^3 - x + 1)^2/(x - 1)^2
+        sage: NumericMonoid.from_generators([3,7]).multiplicities_generating_series()
+        (x^12 - x^11 + x^9 - x^8 + x^6 - x^4 + x^3 - x + 1)^2/(x - 1)^2
+
+        sage: bound = 20
+        sage: all( [(m.support_generating_series()^2).taylor(x, 0, bound-1).coefficient(x, i)
+        ...         for i in range(bound)] ==
+        ...        list(m.multiplicities())[:bound]
+        ...       for k in range(7) for m in Full.nth_generation(k))
+        True
+        """
+        return (self.support_generating_series()**2).normalize()
+
+    def generating_series(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.generating_series()
+        (x - 1)/(x*y + x - 1)
+        sage: NumericMonoid.from_generators([3,5,7]).generating_series()
+        (x - 1)/(x^5*y - x^4*y + x^3*y + x - 1)
+        sage: NumericMonoid.from_generators([3,7]).generating_series()
+        (x - 1)/(x^12*y - x^11*y + x^9*y - x^8*y + x^6*y - x^4*y + x^3*y + x - 1)
+        """
+        from sage.calculus.var import var
+        return 1/(1-var('y')*(self.support_generating_series()-1)).normalize()
+
+    def _test_monoid(self, **options):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: m = NumericMonoid.from_generators([3,5,7])
+        sage: m._test_monoid()
+        sage: m._decomposition_numbers()[2] = 1    # don't do this at home, kids
+        sage: m._test_monoid()
+        Traceback (most recent call last):
+        ...
+        AssertionError: wrong min
+        sage: m = NumericMonoid.from_generators([3,5,7])
+        sage: m._decomposition_numbers()[20] = 0    # don't do this at home, kids
+        sage: m._test_monoid()
+        Traceback (most recent call last):
+        ...
+        AssertionError: wrong genus
+        """
+        cdef ind_t i, genus = 0, size = cSIZE
+        tester = self._tester(**options)
+        if self._m.conductor == 1:
+            tester.assertEqual(self._m.min, 1, "wrong min")
+        tester.assertTrue(all(self._m.decs[i] == 0 for i in range(1, self._m.min)),
+                          "wrong min")
+        for i in range(size):
+            if self._m.decs[i] == 0:
+                genus += 1
+        tester.assertEqual(self._m.genus, genus, "wrong genus")
+        tester.assertEqual(self._m.decs[0], 1, "wrong decs[0]")
+        if self._m.conductor != 1:
+            tester.assertEqual(self._m.decs[self._m.conductor-1], 0,
+                               "conductor in not minimal")
+        tester.assertTrue(all(self._m.decs[i] != 0
+                              for i in range(self._m.conductor, <unsigned int>cSIZE)),
+                          "wrong conductor")
+
+    def _test_generators(self, **options):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full._test_generators()
+        sage: m = NumericMonoid.from_generators([3,5,7])
+        sage: m._test_generators()
+        sage: m._decomposition_numbers()[2] = 2    # don't do this at home, kids
+        sage: m._test_generators()
+        Traceback (most recent call last):
+        ...
+        AssertionError: wrong generators
+        """
+        tester = self._tester(**options)
+        tester.assertEqual(self, NumericMonoid.from_generators(self.generators()),
+                           "wrong generators")
+
+
+    cpdef list walk_children_stack(NumericMonoid self, int bound):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.walk_children_stack(5)
+        [1, 2, 4, 7, 12]
+        """
+        cdef results_type res
+        cdef int i
+        for i in range(bound):
+            res[i] = 0
+        sig_on()
+        with nogil:
+            walk_children_stack(self._m, bound, res)
+        sig_off()
+        return [int(res[i]) for i in range(bound)]
+
+    cpdef list walk_children(NumericMonoid self, int bound):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: Full.walk_children(15)
+        [1, 2, 4, 7, 12, 23, 39, 67, 118, 204, 343, 592, 1001, 1693, 2857]
+        """
+        cilk_results.reset()
+        sig_on()
+        with nogil:
+            walk_children(self._m, bound)
+        sig_off()
+        return [int(cilk_results[i]) for i in range(bound)]
+
+
+cpdef NumericMonoid _from_pickle(type typ, int sz, int cond, int mn, int genus, tuple decs):
+    r"""
+    sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+    sage: TestSuite(Full).run(verbose=True)                       # indirect doctest
+    running ._test_category() . . . pass
+    running ._test_generators() . . . pass
+    running ._test_monoid() . . . pass
+    running ._test_new() . . . pass
+    running ._test_not_implemented_methods() . . . pass
+    running ._test_pickling() . . . pass
+    sage: TestSuite(Full.remove_generator(1)).run()               # indirect doctest
+    sage: TestSuite(NumericMonoid.from_generators([3,5,7])).run() # indirect doctest
+    """
+    cdef NumericMonoid res
+    cdef int i
+
+    if sz != cSIZE:
+        raise ValueError, "mon is compiled with different size (pickle size=%i)"%sz
+    res = NumericMonoid.__new__(typ)
+    res._m.conductor = cond
+    res._m.min = mn
+    res._m.genus = genus
+    for i in range(cSIZE):
+        res._m.decs[i] = decs[i]
+    return res
+
+
+Full = NumericMonoid()
+
+
+cdef class MonoidList(object):
+    r"""
+    A wrapper for C++ STL lists of monoids.
+    """
+
+    def __init__(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: MonoidList()
+        Traceback (most recent call last):
+        ...
+        RuntimeError: You are not supposed to call init
+        """
+        raise RuntimeError, "You are not supposed to call init"
+
+    def __len__(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: len(Full.nth_generation_cilk(4))
+        7
+        """
+        return self._l.size()
+
+    def __iter__(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: iter(Full.nth_generation_cilk(4))
+        <numeric_monoid.MonoidListIterator object at 0x...>
+        """
+        return MonoidListIterator(self)
+
+cdef class MonoidListIterator(object):
+    r"""
+    A wrapper for C++ STL iterator for list of monoids.
+    """
+    def __cinit__(self, MonoidList l):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: MonoidListIterator(Full.nth_generation_cilk(0))
+        <numeric_monoid.MonoidListIterator object at 0x...>
+        """
+        self._ml = l  # keep a reference on _ml to prevent _ml._l from being deallocated
+        self._it = self._ml._l.begin()
+        self._end = self._ml._l.end()
+
+    def __next__(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: it = iter(Full.nth_generation_cilk(2))
+        sage: it.__next__()
+        < 3 4 5 >
+        sage: it.__next__()
+        < 2 5 >
+        sage: it.__next__()
+        Traceback (most recent call last):
+        ...
+        StopIteration
+
+        sage: it = iter(Full.nth_generation_cilk(10))
+        sage: it.__next__()
+        < 11 12 13 14 15 16 17 18 19 20 21 >
+        sage: it.__next__()
+        < 10 12 13 14 15 16 17 18 19 21 >
+        """
+        cdef NumericMonoid res = NumericMonoid.__new__(NumericMonoid)
+        if self._it != self._end:
+            res._m = cython.operator.dereference(self._it)
+            cython.operator.preincrement(self._it)
+            return res
+        else:
+            raise StopIteration
+
+    def __iter__(self):
+        r"""
+        sage: import os; os.sys.path.insert(0,os.path.abspath('.')); from numeric_monoid import *
+        sage: it = iter(Full.nth_generation_cilk(0))
+        sage: iter(it) is it
+        True
+        """
+        return self
+

+ 63 - 0
setup.py

@@ -0,0 +1,63 @@
+# call with
+# sage -python setup.py build_ext --inplace
+
+import os, sys, platform
+
+try:
+    CILK_ROOT = os.environ['CILK_ROOT']
+except KeyError:
+    raise EnvironmentError, "Please define the CILK_ROOT environment variable !"
+# setup the gcc/cilk compiler
+if 'CC' not in os.environ:
+    os.environ['CC'] = os.path.join(CILK_ROOT, 'bin', 'g++-5')
+
+
+from distutils.core import setup
+from distutils.extension import Extension
+from Cython.Distutils import build_ext
+from distutils.extension import Extension
+from sage.env import *
+
+#SAGE_INC = os.path.join(SAGE_LOCAL, 'include')
+#SAGE_C   = os.path.join(SAGE_SRC, 'c_lib', 'include')
+#SAGE_DEV = os.path.join(SAGE_ROOT, 'src')
+
+
+if platform.system()=="Darwin":
+    MARCH='-march=corei7'
+    MTUNE='-march=corei7'
+    CILK_LIB = os.path.join(CILK_ROOT, 'lib')
+else:
+    MARCH='-march=native'
+    MTUNE='-march=native'
+    CILK_LIB = os.path.join(CILK_ROOT, 'lib64')
+
+import Cython.Compiler.Options
+Cython.Compiler.Options.annotate = True
+
+sage_include = sage_include_directories()
+packs = [x for x in sage_include if x.endswith('site-packages')][0]
+sage_include.append(os.path.join(packs, 'cysignals'))
+
+#sage_include+=sys.path
+#sage_include.append('/home/data/Sage-Install/sage-7.2.beta2/local/lib/python2.7/site-packages/cysignals/')
+
+
+setup(
+    cmdclass = {'build_ext': build_ext},
+    ext_modules = [
+        Extension('numeric_monoid',
+                  sources = ['numeric_monoid.pyx', 'monoid.cpp', 'treewalk.cpp'],
+                  depends = ['numeric_monoid.pxd', 'monoid.hpp', 'treewalk.hpp'],
+                  language="c++",
+                  extra_compile_args = ['-std=c++11', '-O3',
+                                        MARCH, MTUNE,
+                                        '-fcilkplus'],
+                  define_macros = [('NDEBUG', '1'), ('MAX_GENUS','86')],
+                  include_dirs = sage_include, # [SAGE_C,SAGE_DEV],
+                  library_dirs = [CILK_LIB],
+                  runtime_library_dirs = [CILK_LIB],
+                  libraries = ['cilkrts'],
+                  ),
+        ])
+

+ 223 - 0
treewalk.cpp

@@ -0,0 +1,223 @@
+#include <iostream>
+#include <iomanip>
+#include <chrono>
+
+using namespace std;
+using namespace std::chrono;
+
+#include "treewalk.hpp"
+
+void walk_children_stack(monoid m, results_type res,results_type resq)
+{
+  monoid data[MAX_GENUS-1], *stack[MAX_GENUS], *current;
+  monoid **stack_pointer = stack + 1;
+
+  for (ind_t i=1; i<MAX_GENUS; i++) stack[i] = &(data[i-1]); // Nathann's trick to avoid copy
+  stack[0] = &m;
+  while (stack_pointer != stack)
+    {
+      --stack_pointer;
+      current = *stack_pointer;
+      treat(*current,res,resq);
+      if (current->genus < MAX_GENUS - 1)
+	{
+	  auto it = generator_iter<CHILDREN>(*current);
+	  while (it.move_next())
+	    {
+	      // exchange top with top+1
+	      stack_pointer[0] = stack_pointer[1];
+	      remove_generator(**stack_pointer, *current, it.get_gen());
+	      stack_pointer++;
+	    }
+	  *stack_pointer = current;
+	  
+	  	}
+      else
+	{
+
+	}
+    }
+}
+
+
+ResultsReducer cilk_results;
+ResultsReducer cilk_results_quotient;
+#ifndef STACK_BOUND
+#define STACK_BOUND 11
+#endif
+
+void walk_children(const monoid m)
+{
+
+  if (m.genus < MAX_GENUS - STACK_BOUND)
+    {
+      treat(m,cilk_results,cilk_results_quotient);
+      auto it = generator_iter<CHILDREN>(m);
+      while (it.move_next())
+	{
+	  cilk_spawn walk_children(remove_generator(m, it.get_gen()));
+	}
+
+     }
+  else
+    walk_children_stack(m, cilk_results.get_array(),cilk_results_quotient.get_array());
+}
+
+
+
+
+void walk_children_stack(monoid m, ind_t bound, results_type res,results_type resq)
+{
+  monoid data[bound], *stack[bound], *current;
+  monoid **stack_pointer = stack + 1;
+
+  for (ind_t i=1; i<bound; i++) stack[i] = &(data[i-1]); // Nathann's trick to avoid copy
+  stack[0] = &m;
+  while (stack_pointer != stack)
+    {
+      --stack_pointer;
+      current = *stack_pointer;
+       treat(*current,res,resq);
+      if (current->genus < bound - 1)
+	{
+	  auto it = generator_iter<CHILDREN>(*current);
+	  while (it.move_next())
+	    {
+	      // exchange top with top+1
+	      stack_pointer[0] = stack_pointer[1];
+	      remove_generator(**stack_pointer, *current, it.get_gen());
+	      stack_pointer++;
+	    }
+	  *stack_pointer = current;
+	}
+      else
+	{
+	}
+    }
+}
+
+void walk_children(const monoid &m, ind_t bound)
+{
+
+  
+  if (m.genus < bound - STACK_BOUND)
+    {
+      treat(m,cilk_results,cilk_results_quotient);
+      auto it = generator_iter<CHILDREN>(m);
+      while (it.move_next())
+	{
+	  cilk_spawn walk_children(remove_generator(m, it.get_gen()), bound);
+	}
+      
+     }
+  else
+    walk_children_stack(m, bound, cilk_results.get_array(),cilk_results_quotient.get_array());
+}
+
+#ifdef TBB
+#include <tbb/scalable_allocator.h>
+cilk::reducer_list_append<monoid, tbb::scalable_allocator<monoid>> cilk_list_results;
+#else
+cilk::reducer_list_append<monoid> cilk_list_results;
+cilk::reducer_list_append<monoid> cilk_list_results_quotient;
+#endif
+
+void list_children(const monoid &m, ind_t bound)
+{
+  if (m.genus < bound)
+    {
+      auto it = generator_iter<CHILDREN>(m);
+      while (it.move_next())
+	cilk_spawn list_children(remove_generator(m, it.get_gen()), bound);
+     }
+  else{
+    cilk_list_results.push_back(m);
+  }
+}
+
+
+#include <cpuid.h>
+#include <cilk/cilk_api.h>
+
+static void show_usage(string name)
+{
+  cerr << "Usage: " << name << " [-n <proc_number>] " << endl;
+}
+
+
+int main(int argc, char **argv)
+{
+  monoid N;
+  unsigned long int total = 0;
+  string nproc = "0";
+
+  if (argc != 1 and argc != 3) { show_usage(argv[0]); return 1; }
+  if (argc == 3)
+    {
+      if (string(argv[1]) != "-n")  { show_usage(argv[0]); return 1; }
+      nproc = argv[2];
+    }
+
+  unsigned int ax, bx, cx, dx;
+  if (!__get_cpuid(0x00000001, &ax, &bx, &cx, &dx))
+    {
+      cerr << "Unable to determine the processor type !" << endl;
+      return EXIT_FAILURE;
+    }
+  if (!(cx & bit_SSSE3))
+    {
+      cerr << "This programm require sse3 instructions set !" << endl;
+      return EXIT_FAILURE;
+    }
+  if (!(cx & bit_POPCNT))
+    {
+      cerr << "This programm require popcount instruction !" << endl;
+      return EXIT_FAILURE;
+    }
+
+  if (__cilkrts_set_param("nworkers", nproc.c_str() ) != __CILKRTS_SET_PARAM_SUCCESS)
+    cerr << "Failed to set the number of Cilk workers" << endl;
+
+  cout << "Computing number of numeric monoids for genus <= "
+       << MAX_GENUS << " and of quotient "<< QUOTIENT<< " using " << __cilkrts_get_nworkers() << " workers" << endl;
+  cout << "Sizeof(monoid) = " << sizeof(monoid) << endl;
+  auto begin = high_resolution_clock::now();
+  init_full_N(N);
+  walk_children(N);
+  auto end = high_resolution_clock::now();
+  duration<double> ticks = end-begin;
+
+  cout << endl << "============================" << endl << endl;
+  for (unsigned int i=0; i<MAX_GENUS; i++)
+    {
+      cout << cilk_results_quotient[i] << ",";
+      total += cilk_results_quotient[i];
+    }
+  cout << endl;
+  cout << "Total = " << total << endl;
+  cout << endl << "============================" << endl << endl;
+  total=0;
+  for (unsigned int i=0; i<MAX_GENUS; i++)
+    {
+      cout << cilk_results[i] << ",";
+      total += cilk_results[i];
+    }
+  cout << endl;
+  cout << "Total = " << total <<
+       ", computation time = " << std::setprecision(4) << ticks.count() << " s."  << endl;
+  return EXIT_SUCCESS;
+}
+
+void treat(const monoid &m,results_type res,results_type res_quotient){
+  res[m.genus] += 1;
+  if(m.quotient()==QUOTIENT){
+    res_quotient[m.genus] += 1;
+  }
+}
+
+void treat(const monoid &m,ResultsReducer& res,ResultsReducer& res_quotient){
+  res[m.genus] += 1;
+  if(m.quotient()==QUOTIENT){
+    res_quotient[m.genus] += 1;
+  }
+}

+ 52 - 0
treewalk.hpp

@@ -0,0 +1,52 @@
+#include <cilk/cilk.h>
+#include <cilk/reducer.h>
+#include <cilk/reducer_list.h>
+#include "monoid.hpp"
+
+typedef unsigned long int results_type[MAX_GENUS];
+void walk_children_stack(monoid m, results_type res,results_type resq);
+void walk_children_stack(monoid m, ind_t bound, results_type res,results_type resq);
+
+struct Results
+{
+  results_type values;
+  inline Results() {reset();};
+  inline void reset() {for(int i=0; i<MAX_GENUS; i++) values[i] = 0;};
+};
+
+class ResultsReducer
+{
+  struct Monoid: cilk::monoid_base<Results>
+  {
+    inline static void reduce (Results *left, Results *right){
+      for(auto i=0; i<MAX_GENUS; i++) left->values[i] += right->values[i];
+    }
+  };
+private:
+  cilk::reducer<Monoid> imp_;
+public:
+  ResultsReducer() : imp_() {};
+  inline unsigned long int & operator[](ind_t i) {
+    return imp_.view().values[i];
+  };
+  inline results_type &get_array() {return imp_.view().values;}
+  inline void reset() {imp_.view().reset();}
+};
+
+extern ResultsReducer cilk_results;
+extern ResultsReducer cilk_results_quotient;
+
+
+#ifdef TBB
+#include <tbb/scalable_allocator.h>
+extern cilk::reducer_list_append<monoid, tbb::scalable_allocator<monoid>> cilk_list_results;
+#else
+extern cilk::reducer_list_append<monoid> cilk_list_results;
+#endif
+
+
+void walk_children(const monoid m);
+void walk_children(const monoid &m, ind_t bound);
+void list_children(const monoid &m, ind_t bound);
+void treat(const monoid &m,results_type res,results_type res_quotient);
+void treat(const monoid &m,ResultsReducer& res,ResultsReducer& res_quotient);

+ 30 - 0
treewalk.pxd

@@ -0,0 +1,30 @@
+from monoid cimport *
+from libcpp.list cimport list as stl_list
+
+cdef extern from "cilk/reducer_list.h" namespace "cilk":
+    cdef cppclass reducer_list_append[T]:
+        reducer_list_append() nogil except +
+        reducer_list_append(stl_list[T] &) nogil except +
+        stl_list[T] get_value() nogil
+        stl_list[T] &get_reference() nogil
+        void set_value(stl_list[T] &) nogil
+        void push_back(T) nogil
+
+
+cdef extern from "treewalk.hpp":
+    ctypedef unsigned long int results_type[cMAX_GENUS]
+    void walk_children_stack(monoid m, results_type &res) nogil
+    void walk_children_stack(monoid m, ind_t bound, results_type &res) nogil
+
+    cdef cppclass ResultsReducer:
+        ResultsReducer()  nogil except +
+        unsigned long int & operator[](ind_t i) nogil
+        results_type &get_array() nogil
+        void reset() nogil
+
+    ResultsReducer cilk_results
+    reducer_list_append[monoid] cilk_list_results
+    void walk_children(const monoid &m) nogil
+    void walk_children(const monoid &m, ind_t bound) nogil
+    void list_children(const monoid &m, ind_t bound) nogil
+