semigroup.hpp 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. #ifndef SEMIGROUP_HPP
  2. #define SEMIGROUP_HPP
  3. #include "config.hpp"
  4. #include <cstdint>
  5. #include <fstream>
  6. #define SIZE_BOUND (3*(MAX_GENUS-1))
  7. #define NBLOCKS ((SIZE_BOUND+15) >> 4)
  8. #define SIZE (NBLOCKS << 4)
  9. // const uint_fast8_t MAX_GENUS = 40;
  10. // const uint_fast8_t SIZE_BOUND = (3*(MAX_GENUS-1));
  11. // const uint_fast8_t NBLOCKS = ((SIZE_BOUND+15) >> 4);
  12. // const uint_fast8_t SIZE = (NBLOCKS << 4);
  13. #include <x86intrin.h>
  14. typedef uint8_t epi8 __attribute__ ((vector_size (16)));
  15. typedef uint8_t dec_numbers[SIZE] __attribute__ ((aligned (16)));
  16. typedef epi8 dec_blocks[NBLOCKS];
  17. typedef uint_fast64_t ind_t; // The type used for array indexes
  18. using namespace std;
  19. struct Semigroup
  20. {
  21. union {
  22. dec_numbers decs;
  23. dec_blocks blocks;
  24. };
  25. // Dont use char as they have to be promoted to 64 bits to do pointer arithmetic.
  26. ind_t conductor, min, genus,left_primitive,left,e,wilf;
  27. };
  28. void init_full_N(Semigroup &);
  29. void init(Semigroup&,char,char,char,char*);
  30. void print_Semigroup(const Semigroup &);
  31. void print_Semigroup_gen(const Semigroup&);
  32. void print_epi8(epi8);
  33. void output(const Semigroup& m,fstream& f);
  34. void record(const Semigroup& S,fstream& f);
  35. inline void copy_blocks( dec_blocks &__restrict__ dst,
  36. const dec_blocks &__restrict__ src);
  37. inline void remove_generator(Semigroup &__restrict__ dst,
  38. const Semigroup &__restrict__ src,
  39. ind_t gen,ind_t pos);
  40. inline Semigroup remove_generator(const Semigroup &src, ind_t gen,ind_t pos);
  41. // typedef enum { ALL, CHILDREN } generator_type;
  42. class ALL {};
  43. class CHILDREN {};
  44. // template <generator_type T> class generator_iter
  45. template <class T> class generator_iter{
  46. private:
  47. const Semigroup &m;
  48. unsigned int mask; // movemask_epi8 returns a 32 bits values
  49. ind_t iblock, gen, bound;
  50. public:
  51. generator_iter(const Semigroup &mon);
  52. bool move_next();
  53. uint8_t count();
  54. inline ind_t get_gen() const {return gen; };
  55. };
  56. ///////////////////////////////////////////////////////////////////////////
  57. ///////////////////////////////////////////////////////////////////////////
  58. //////////////// Implementation part here for inlining ////////////////
  59. ///////////////////////////////////////////////////////////////////////////
  60. ///////////////////////////////////////////////////////////////////////////
  61. /*
  62. Note: for some reason the code using gcc vector variables is 2-3% faster than
  63. the code using gcc intrinsic instructions.
  64. Here are the results of two runs:
  65. data_mmx = [9.757, 9.774, 9.757, 9.761, 9.811, 9.819, 9.765, 9.888, 9.774, 9.771]
  66. data_epi8 = [9.592, 9.535, 9.657, 9.468, 9.460, 9.679, 9.458, 9.461, 9.629, 9.474]
  67. */
  68. extern inline epi8 load_unaligned_epi8(const uint8_t *t)
  69. { return (epi8) _mm_loadu_si128((__m128i *) (t)); };
  70. extern inline epi8 shuffle_epi8(epi8 b, epi8 sh) // Require SSE 3
  71. { return (epi8) _mm_shuffle_epi8((__m128i) b, (__m128i) sh);}
  72. extern inline int movemask_epi8(epi8 b)
  73. { return _mm_movemask_epi8((__m128i) b);};
  74. const epi8 zero = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
  75. const epi8 block1 = zero + 1;
  76. const uint8_t m1 = 255;
  77. const epi8 shift16[16] =
  78. { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
  79. {m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14},
  80. {m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13},
  81. {m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12},
  82. {m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11},
  83. {m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10},
  84. {m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
  85. {m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8},
  86. {m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7},
  87. {m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6},
  88. {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5},
  89. {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4},
  90. {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3},
  91. {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2},
  92. {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1},
  93. {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0} };
  94. const epi8 mask16[16] =
  95. { {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
  96. { 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
  97. { 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
  98. { 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
  99. { 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
  100. { 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
  101. { 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
  102. { 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1},
  103. { 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1},
  104. { 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1},
  105. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1},
  106. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1},
  107. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1},
  108. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1},
  109. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1},
  110. { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1} };
  111. template<> inline generator_iter<ALL>::generator_iter(const Semigroup &mon)
  112. : m(mon), bound((mon.conductor+mon.min+15) >> 4){
  113. epi8 block;
  114. iblock = 0;
  115. block = m.blocks[0];
  116. mask = movemask_epi8(block == block1);
  117. mask &= 0xFFFE; // 0 is not a generator
  118. gen = - 1;
  119. };
  120. template<> inline generator_iter<CHILDREN>::generator_iter(const Semigroup &mon)
  121. : m(mon), bound((mon.conductor+mon.min+15) >> 4){
  122. epi8 block;
  123. iblock = m.conductor >> 4;
  124. block = m.blocks[iblock] & mask16[m.conductor & 0xF];
  125. mask = movemask_epi8(block == block1);
  126. gen = (iblock << 4) - 1;
  127. };
  128. template <class T> inline uint8_t generator_iter<T>::count(){
  129. uint8_t nbr = _mm_popcnt_u32(mask); // popcnt returns a 8 bits value
  130. for (ind_t ib = iblock+1; ib < bound; ib++)
  131. nbr += _mm_popcnt_u32(movemask_epi8(m.blocks[ib] == block1));
  132. return nbr;
  133. };
  134. template <class T> inline bool generator_iter<T>::move_next(){
  135. while (!mask){
  136. iblock++;
  137. if (iblock > bound) return false;
  138. gen = (iblock << 4) - 1;
  139. mask = movemask_epi8(m.blocks[iblock] == block1);
  140. }
  141. unsigned char shift = __bsfd (mask) + 1; // Bit Scan Forward
  142. gen += shift;
  143. mask >>= shift;
  144. return true;
  145. };
  146. inline __attribute__((always_inline))
  147. void copy_blocks(dec_blocks &dst, dec_blocks const &src){
  148. for (ind_t i=0; i<NBLOCKS; i++) dst[i] = src[i];
  149. }
  150. #include <cassert>
  151. inline __attribute__((always_inline))
  152. void remove_generator(Semigroup &__restrict__ dst,
  153. const Semigroup &__restrict__ src,
  154. ind_t gen,
  155. ind_t pos){
  156. ind_t start_block, decal;
  157. epi8 block;
  158. assert(src.decs[gen] == 1);
  159. ind_t t=gen+1;
  160. dst.conductor = t;
  161. dst.genus = src.genus + 1;
  162. int delta;
  163. if(gen==src.min){
  164. dst.min=gen+1;
  165. delta=1;
  166. }
  167. else{
  168. dst.min=src.min;
  169. delta=(src.decs[gen+src.min]==2)?0:-1;
  170. }
  171. dst.e=src.e+delta;
  172. assert(dst.e==((gen==src.min)?src.e+1:((src.decs[gen+src.min]==2)?src.e:src.e-1)));
  173. ind_t k=gen-src.conductor;
  174. assert(dst.conductor==src.conductor+k+1);
  175. dst.left=src.left+k;
  176. dst.left_primitive=src.left_primitive+pos;
  177. //dst.wilf=dst.e*dst.left-dst.conductor;//src.wilf+delta*(src.left+k)-k-1;
  178. dst.wilf=src.wilf+delta*(src.left+k)+(src.e-1)*k-1;
  179. copy_blocks(dst.blocks, src.blocks);
  180. start_block = gen >> 4;
  181. decal = gen & 0xF;
  182. // Shift block by decal uchar
  183. block = shuffle_epi8(src.blocks[0], shift16[decal]);
  184. dst.blocks[start_block] -= ((block != zero) & block1);
  185. for (auto i=start_block+1; i<NBLOCKS; i++){
  186. // The following won't work due to some alignment problem (bug in GCC 4.7.1?)
  187. // block = *((epi8*)(src.decs + ((i-start_block)<<4) - decal));
  188. block = load_unaligned_epi8(src.decs + ((i-start_block)<<4) - decal);
  189. dst.blocks[i] -= ((block != zero) & block1);
  190. }
  191. assert(dst.decs[dst.conductor-1] == 0);
  192. }
  193. inline Semigroup
  194. remove_generator(const Semigroup &src, ind_t gen,ind_t pos){
  195. Semigroup dst;
  196. remove_generator(dst, src, gen,pos);
  197. return dst;
  198. }
  199. #endif