gKit2 light
rand123.h
Go to the documentation of this file.
1 
3 
4 #ifndef _RAND123_H
5 #define _RAND123_H
6 
7 #include <cstdint>
8 
9 
10 // philox 2x64 6 rounds
11 // rewritten from https://github.com/DEShawResearch/random123/blob/main/include/Random123/philox.h
12 struct Philox
13 {
14  Philox( ) : ctr(), key() { seed64(0xcf492074862957a3); }
15  Philox( const uint64_t s ) : ctr(), key() { seed64(s); }
16 
17  Philox( const unsigned hi, const unsigned lo= 0x40ba6a95 ) : ctr(), key() { seed(hi, lo); }
18  void seed( const unsigned hi, const unsigned lo= 0x40ba6a95 ) { seed64( uint64_t(hi) << 32 | uint64_t(lo) );}
19 
20  void seed64( const uint64_t s )
21  {
22  ctr= { { 0, 0x838ca1328d07d3ba } };
23  key= { { s } };
24  }
25 
26  Philox& index( const uint64_t i )
27  {
28  ctr= { { i, 0x838ca1328d07d3ba } };
29  return *this;
30  }
31 
32  unsigned sample( )
33  {
34  ctr.v[0]++;
35  return philox2x64<6>(ctr, key).v[0]; // 6 rounds
36  //~ return philox2x64<10>(ctr, key).v[0]; // 10 rounds
37  }
38 
39  unsigned sample_range( const unsigned range )
40  {
41  // Efficiently Generating a Number in a Range
42  // cf http://www.pcg-random.org/posts/bounded-rands.html
43  unsigned divisor= ((-range) / range) + 1; // (2^32) / range
44  if(divisor == 0) return 0;
45 
46  while(true)
47  {
48  unsigned x= sample() / divisor;
49  if(x < range) return x;
50  }
51  }
52 
53  // c++ interface
54  unsigned operator() ( ) { return sample(); }
55  static constexpr unsigned min( ) { return 0; }
56  static constexpr unsigned max( ) { return ~unsigned(0); }
57  typedef unsigned result_type;
58 
59 protected:
60  struct array2x64 { uint64_t v[2]; };
61  struct array1x64 { uint64_t v[1]; };
62 
63  array1x64 bumpkey( array1x64 key )
64  {
65  key.v[0] += 0x9E3779B97F4A7C15UL;
66  return key;
67  }
68 
69  uint64_t mulhilo64( uint64_t a, uint64_t b, uint64_t* hip )
70  {
71  __uint128_t product = __uint128_t(a) * __uint128_t(b);
72  *hip= uint64_t(product >> 64);
73  return uint64_t(product);
74  }
75 
76  array2x64 round( array2x64 ctr, array1x64 key )
77  {
78  uint64_t hi;
79  uint64_t lo = mulhilo64(0xD2B74407B1CE6E93UL, ctr.v[0], &hi);
80 
81  return { { hi^key.v[0]^ctr.v[1], lo } };
82  }
83 
84  template< int R >
85  array2x64 philox2x64( array2x64 ctr, array1x64 key )
86  {
87  if(R>0) { ctr = round(ctr, key); }
88  if(R>1) { key = bumpkey(key); ctr = round(ctr, key); }
89  if(R>2) { key = bumpkey(key); ctr = round(ctr, key); }
90  if(R>3) { key = bumpkey(key); ctr = round(ctr, key); }
91  if(R>4) { key = bumpkey(key); ctr = round(ctr, key); }
92  if(R>5) { key = bumpkey(key); ctr = round(ctr, key); }
93  if(R>6) { key = bumpkey(key); ctr = round(ctr, key); }
94  if(R>7) { key = bumpkey(key); ctr = round(ctr, key); }
95  if(R>8) { key = bumpkey(key); ctr = round(ctr, key); }
96  if(R>9) { key = bumpkey(key); ctr = round(ctr, key); }
97  if(R>10) { key = bumpkey(key); ctr = round(ctr, key); }
98  if(R>11) { key = bumpkey(key); ctr = round(ctr, key); }
99  if(R>12) { key = bumpkey(key); ctr = round(ctr, key); }
100  if(R>13) { key = bumpkey(key); ctr = round(ctr, key); }
101  if(R>14) { key = bumpkey(key); ctr = round(ctr, key); }
102  if(R>15) { key = bumpkey(key); ctr = round(ctr, key); }
103 
104  return ctr;
105  }
106 
107  array2x64 ctr;
108  array1x64 key;
109 };
110 
111 
112 // philox 2x32 6 rounds
113 // rewritten from https://github.com/DEShawResearch/random123/blob/main/include/Random123/philox.h
114 struct Philox32
115 {
116  Philox32( ) : n(), key() { seed64(0xcf492074862957a3); }
117  Philox32( const uint64_t s ) : n(), key() { seed(s); }
118  Philox32( const unsigned hi, const unsigned lo= 0x40ba6a95 ) : n(), key() { seed(hi, lo); }
119 
120  void seed( const unsigned hi, const unsigned lo= 0x40ba6a95 ) { seed64(uint64_t(hi) << 32 | uint64_t(lo)); }
121  void seed64( const uint64_t s )
122  {
123  n= 0;
124  key= { { unsigned(s >> 32) } };
125  }
126 
127  Philox32& index( const uint64_t i )
128  {
129  n= i;
130  return *this;
131  }
132 
133  unsigned sample( )
134  {
135  n++;
136  array2x32 ctr= { { unsigned(n >>32), unsigned(n) } };
137  return philox2x32<6>(ctr, key).v[0]; // 6 rounds
138  //~ return philox2x32<10>(ctr, key).v[0]; // 10 rounds
139  }
140 
141  unsigned sample_range( const unsigned range )
142  {
143  // Efficiently Generating a Number in a Range
144  // cf http://www.pcg-random.org/posts/bounded-rands.html
145  unsigned divisor= ((-range) / range) + 1; // (2^32) / range
146  if(divisor == 0) return 0;
147 
148  while(true)
149  {
150  unsigned x= sample() / divisor;
151  if(x < range) return x;
152  }
153  }
154 
155  // c++ interface
156  unsigned operator() ( ) { return sample(); }
157  static constexpr unsigned min( ) { return 0; }
158  static constexpr unsigned max( ) { return ~unsigned(0); }
159  typedef unsigned result_type;
160 
161 protected:
162  struct array2x32 { unsigned v[2]; };
163  struct array1x32 { unsigned v[1]; };
164 
165  array1x32 bumpkey( array1x32 key )
166  {
167  key.v[0] += 0x9E3779B9;
168  return key;
169  }
170 
171  unsigned mulhilo32( unsigned a, unsigned b, unsigned* hip )
172  {
173  uint64_t product = uint64_t(a) * uint64_t(b);
174  *hip= unsigned(product >> 32);
175  return unsigned(product);
176  }
177 
178  array2x32 round( array2x32 ctr, array1x32 key )
179  {
180  uint32_t hi, lo= mulhilo32(0xd256d193, ctr.v[0], &hi);
181 
182  return { { hi^key.v[0]^ctr.v[1], lo } };
183  }
184 
185  template< int R >
186  array2x32 philox2x32( array2x32 ctr, array1x32 key )
187  {
188  if(R>0) { ctr = round(ctr, key); }
189  if(R>1) { key = bumpkey(key); ctr = round(ctr, key); }
190  if(R>2) { key = bumpkey(key); ctr = round(ctr, key); }
191  if(R>3) { key = bumpkey(key); ctr = round(ctr, key); }
192  if(R>4) { key = bumpkey(key); ctr = round(ctr, key); }
193  if(R>5) { key = bumpkey(key); ctr = round(ctr, key); }
194  if(R>6) { key = bumpkey(key); ctr = round(ctr, key); }
195  if(R>7) { key = bumpkey(key); ctr = round(ctr, key); }
196  if(R>8) { key = bumpkey(key); ctr = round(ctr, key); }
197  if(R>9) { key = bumpkey(key); ctr = round(ctr, key); }
198  if(R>10) { key = bumpkey(key); ctr = round(ctr, key); }
199  if(R>11) { key = bumpkey(key); ctr = round(ctr, key); }
200  if(R>12) { key = bumpkey(key); ctr = round(ctr, key); }
201  if(R>13) { key = bumpkey(key); ctr = round(ctr, key); }
202  if(R>14) { key = bumpkey(key); ctr = round(ctr, key); }
203  if(R>15) { key = bumpkey(key); ctr = round(ctr, key); }
204 
205  return ctr;
206  }
207 
208  uint64_t n;
209  array1x32 key;
210 };
211 
212 
213 // threefry 2x32 13 rounds
214 // rewritten from https://github.com/DEShawResearch/random123/blob/main/include/Random123/threefry.h
215 struct Threefry
216 {
217  Threefry( ) : n(), key() { seed64(0xcf492074862957a3); }
218  Threefry( const uint64_t s ) : n(), key() { seed64(s); }
219  Threefry( const unsigned hi, const unsigned lo= 0xacac820c ) : n(), key() { seed(hi, lo); }
220 
221  void seed( const unsigned hi, const unsigned lo= 0xacac820c ) { seed64( uint64_t(hi) << 32 | uint64_t(lo) ); }
222  void seed64( const uint64_t s )
223  {
224  n= 0;
225  key= { unsigned(s >> 32), unsigned(s) };
226  }
227 
228  Threefry& index( const uint64_t i )
229  {
230  n= i;
231  return *this;
232  }
233 
234  unsigned sample( )
235  {
236  n++;
237  array2x32 ctr= { { unsigned(n >>32), unsigned(n) } };
238  return threefry2x32<13>(ctr, key).v[0];
239  }
240 
241  unsigned sample_range( const unsigned range )
242  {
243  // Efficiently Generating a Number in a Range
244  // cf http://www.pcg-random.org/posts/bounded-rands.html
245  unsigned divisor= ((-range) / range) + 1; // (2^32) / range
246  if(divisor == 0) return 0;
247 
248  while(true)
249  {
250  unsigned x= sample() / divisor;
251  if(x < range) return x;
252  }
253  }
254 
255  // c++ interface
256  unsigned operator() ( ) { return sample(); }
257  static constexpr unsigned min( ) { return 0; }
258  static constexpr unsigned max( ) { return ~unsigned(0); }
259  typedef unsigned result_type;
260 
261  struct array2x32 { unsigned v[2]; };
262 
263 private:
264  static constexpr unsigned R_32x2_0_0= 13;
265  static constexpr unsigned R_32x2_1_0= 15;
266  static constexpr unsigned R_32x2_2_0= 26;
267  static constexpr unsigned R_32x2_3_0= 6;
268  static constexpr unsigned R_32x2_4_0= 17;
269  static constexpr unsigned R_32x2_5_0= 29;
270  static constexpr unsigned R_32x2_6_0= 16;
271  static constexpr unsigned R_32x2_7_0= 24;
272 
273  unsigned rotl32( const unsigned x, const unsigned n ) { return (x << (n & 31)) | (x >> ((32 - n) & 31)); }
274 
275  template<int R >
276  array2x32 threefry2x32( array2x32 ctr, array2x32 k )
277  {
278  array2x32 x;
279  unsigned ks[3]= { 0, 0, 0x1BD11BDA };
280  for(int i= 0; i < 2; i++)
281  {
282  ks[i] = k.v[i];
283  x.v[i] = ctr.v[i];
284  ks[2] ^= k.v[i];
285  }
286 
287  x.v[0] += ks[0];
288  x.v[1] += ks[1];
289  if(R>0) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_0_0); x.v[1] ^= x.v[0]; }
290  if(R>1) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_1_0); x.v[1] ^= x.v[0]; }
291  if(R>2) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_2_0); x.v[1] ^= x.v[0]; }
292  if(R>3) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_3_0); x.v[1] ^= x.v[0]; }
293  if(R>3) { x.v[0] += ks[1]; x.v[1] += ks[2]; x.v[1] += 1; }
294  if(R>4) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_4_0); x.v[1] ^= x.v[0]; }
295  if(R>5) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_5_0); x.v[1] ^= x.v[0]; }
296  if(R>6) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_6_0); x.v[1] ^= x.v[0]; }
297  if(R>7) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_7_0); x.v[1] ^= x.v[0]; }
298  if(R>7) { x.v[0] += ks[2]; x.v[1] += ks[0]; x.v[1] += 2; }
299  if(R>8) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_0_0); x.v[1] ^= x.v[0]; }
300  if(R>9) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_1_0); x.v[1] ^= x.v[0]; }
301  if(R>10) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_2_0); x.v[1] ^= x.v[0]; }
302  if(R>11) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_3_0); x.v[1] ^= x.v[0]; }
303  if(R>11) { x.v[0] += ks[0]; x.v[1] += ks[1]; x.v[1] += 3; }
304  if(R>12) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_4_0); x.v[1] ^= x.v[0]; }
305  if(R>13) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_5_0); x.v[1] ^= x.v[0]; }
306  if(R>14) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_6_0); x.v[1] ^= x.v[0]; }
307  if(R>15) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_7_0); x.v[1] ^= x.v[0]; }
308  if(R>15) { x.v[0] += ks[1]; x.v[1] += ks[2]; x.v[1] += 4; }
309  if(R>16) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_0_0); x.v[1] ^= x.v[0]; }
310  if(R>17) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_1_0); x.v[1] ^= x.v[0]; }
311  if(R>18) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_2_0); x.v[1] ^= x.v[0]; }
312  if(R>19) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_3_0); x.v[1] ^= x.v[0]; }
313  if(R>19) { x.v[0] += ks[2]; x.v[1] += ks[0]; x.v[1] += 5; }
314 
315  return x;
316  }
317 
318  uint64_t n;
319  array2x32 key;
320 };
321 
322 
323 #ifdef __AES__
324 #include <x86intrin.h>
325 
326 // ars 1x128 5 rounds
327 // rewritten from https://github.com/DEShawResearch/random123/blob/main/include/Random123/ars.h
328 // utilise les instructions AES, compiler avec g++ -maes ou g++ -march=native
329 struct Ars
330 {
331  Ars( ) : ctr(), key() { seed64(0xcf492074862957a3); }
332  Ars( const uint64_t s ) : ctr(), key() { seed64(s); }
333  Ars( const unsigned hi, const unsigned lo= 0xacac820c ) : ctr(), key() { seed(hi, lo); }
334 
335  void seed( const unsigned hi, const unsigned lo= 0xacac820c ) { seed64( uint64_t(hi) << 32 | uint64_t(lo) ); }
336  void seed64( const uint64_t s )
337  {
338  ctr= { 0, 0x838ca1328d07d3ba };
339  key= { s, 0xcf492074862957a3 };
340  }
341 
342  Ars& index( const uint64_t i )
343  {
344  ctr= { i, 0x838ca1328d07d3ba };
345  return *this;
346  }
347 
348  unsigned sample( )
349  {
350  array1x128 one= { 1, 0 };
351  ctr= _mm_add_epi64(ctr, one);
352 
353  array1x128 out= ars1x128<5>(ctr, key);
354  return _mm_extract_epi32(out.v, 3);
355  }
356 
357  unsigned sample_range( const unsigned range )
358  {
359  // Efficiently Generating a Number in a Range
360  // cf http://www.pcg-random.org/posts/bounded-rands.html
361  unsigned divisor= ((-range) / range) + 1; // (2^32) / range
362  if(divisor == 0) return 0;
363 
364  while(true)
365  {
366  unsigned x= sample() / divisor;
367  if(x < range) return x;
368  }
369  }
370 
371  // c++ interface
372  unsigned operator() ( ) { return sample(); }
373  static constexpr unsigned min( ) { return 0; }
374  static constexpr unsigned max( ) { return ~unsigned(0); }
375  typedef unsigned result_type;
376 
377 protected:
378  struct array1x128
379  {
380  __m128i v;
381 
382  array1x128( ) = default;
383  array1x128( const uint64_t hi, const uint64_t lo ) { v= _mm_set_epi64x(hi, lo); }
384  array1x128( const __m128i x ) { v= x; }
385  operator __m128i( ) const { return v; }
386  };
387 
388  template< int R >
389  array1x128 ars1x128( array1x128 ctr, array1x128 key )
390  {
391  __m128i kweyl = _mm_set_epi64x(0xBB67AE8584CAA73BUL, 0x9E3779B97F4A7C15UL);
392  __m128i k = key.v;
393  __m128i v = _mm_xor_si128(ctr.v, k);
394 
395  if(R>1) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
396  if(R>2) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
397  if(R>3) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
398  if(R>4) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
399  if(R>5) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
400  if(R>6) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
401  if(R>7) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
402  if(R>8) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
403  if(R>9) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
404 
405  k = _mm_add_epi64(k, kweyl);
406  v = _mm_aesenclast_si128(v, k);
407  return v;
408  }
409 
410  array1x128 ctr;
411  array1x128 key;
412 };
413 #else
414 struct Ars {}; // compiler avec g++ -maes
415 #endif
416 
417 
418 #endif
Point max(const Point &a, const Point &b)
renvoie la plus grande composante de chaque point. x, y, z= max(a.x, b.x), max(a.y,...
Definition: vec.cpp:35
Point min(const Point &a, const Point &b)
renvoie la plus petite composante de chaque point. x, y, z= min(a.x, b.x), min(a.y,...
Definition: vec.cpp:30
Definition: rand123.h:414
Definition: rand123.h:13