gKit2 light
Loading...
Searching...
No Matches
rand123.h
Go to the documentation of this file.
1
3
4#ifndef _RAND123_H
5#define _RAND123_H
6
7#include <cstdint>
8
9
10// philox 2x64 6 rounds
11// rewritten from https://github.com/DEShawResearch/random123/blob/main/include/Random123/philox.h
12struct Philox
13{
14 Philox( ) : ctr(), key() { seed64(0xcf492074862957a3); }
15 Philox( const uint64_t s ) : ctr(), key() { seed64(s); }
16
17 Philox( const unsigned hi, const unsigned lo= 0x40ba6a95 ) : ctr(), key() { seed(hi, lo); }
18 void seed( const unsigned hi, const unsigned lo= 0x40ba6a95 ) { seed64( uint64_t(hi) << 32 | uint64_t(lo) );}
19
20 void seed64( const uint64_t s )
21 {
22 ctr= { { 0, 0x838ca1328d07d3ba } };
23 key= { { s } };
24 }
25
26 Philox& index( const uint64_t i )
27 {
28 ctr= { { i, 0x838ca1328d07d3ba } };
29 return *this;
30 }
31
32 unsigned sample( )
33 {
34 ctr.v[0]++;
35 return philox2x64<6>(ctr, key).v[0]; // 6 rounds
36 //~ return philox2x64<10>(ctr, key).v[0]; // 10 rounds
37 }
38
39 unsigned sample_range( const unsigned range )
40 {
41 // Efficiently Generating a Number in a Range
42 // cf http://www.pcg-random.org/posts/bounded-rands.html
43 unsigned divisor= ((-range) / range) + 1; // (2^32) / range
44 if(divisor == 0) return 0;
45
46 while(true)
47 {
48 unsigned x= sample() / divisor;
49 if(x < range) return x;
50 }
51 }
52
53 // c++ interface
54 unsigned operator() ( ) { return sample(); }
55 static constexpr unsigned min( ) { return 0; }
56 static constexpr unsigned max( ) { return ~unsigned(0); }
57 typedef unsigned result_type;
58
59protected:
60 struct array2x64 { uint64_t v[2]; };
61 struct array1x64 { uint64_t v[1]; };
62
63 array1x64 bumpkey( array1x64 key )
64 {
65 key.v[0] += 0x9E3779B97F4A7C15UL;
66 return key;
67 }
68
69 uint64_t mulhilo64( uint64_t a, uint64_t b, uint64_t* hip )
70 {
71 __uint128_t product = __uint128_t(a) * __uint128_t(b);
72 *hip= uint64_t(product >> 64);
73 return uint64_t(product);
74 }
75
76 array2x64 round( array2x64 ctr, array1x64 key )
77 {
78 uint64_t hi;
79 uint64_t lo = mulhilo64(0xD2B74407B1CE6E93UL, ctr.v[0], &hi);
80
81 return { { hi^key.v[0]^ctr.v[1], lo } };
82 }
83
84 template< int R >
85 array2x64 philox2x64( array2x64 ctr, array1x64 key )
86 {
87 if(R>0) { ctr = round(ctr, key); }
88 if(R>1) { key = bumpkey(key); ctr = round(ctr, key); }
89 if(R>2) { key = bumpkey(key); ctr = round(ctr, key); }
90 if(R>3) { key = bumpkey(key); ctr = round(ctr, key); }
91 if(R>4) { key = bumpkey(key); ctr = round(ctr, key); }
92 if(R>5) { key = bumpkey(key); ctr = round(ctr, key); }
93 if(R>6) { key = bumpkey(key); ctr = round(ctr, key); }
94 if(R>7) { key = bumpkey(key); ctr = round(ctr, key); }
95 if(R>8) { key = bumpkey(key); ctr = round(ctr, key); }
96 if(R>9) { key = bumpkey(key); ctr = round(ctr, key); }
97 if(R>10) { key = bumpkey(key); ctr = round(ctr, key); }
98 if(R>11) { key = bumpkey(key); ctr = round(ctr, key); }
99 if(R>12) { key = bumpkey(key); ctr = round(ctr, key); }
100 if(R>13) { key = bumpkey(key); ctr = round(ctr, key); }
101 if(R>14) { key = bumpkey(key); ctr = round(ctr, key); }
102 if(R>15) { key = bumpkey(key); ctr = round(ctr, key); }
103
104 return ctr;
105 }
106
107 array2x64 ctr;
108 array1x64 key;
109};
110
111
112// philox 2x32 6 rounds
113// rewritten from https://github.com/DEShawResearch/random123/blob/main/include/Random123/philox.h
114struct Philox32
115{
116 Philox32( ) : n(), key() { seed64(0xcf492074862957a3); }
117 Philox32( const uint64_t s ) : n(), key() { seed(s); }
118 Philox32( const unsigned hi, const unsigned lo= 0x40ba6a95 ) : n(), key() { seed(hi, lo); }
119
120 void seed( const unsigned hi, const unsigned lo= 0x40ba6a95 ) { seed64(uint64_t(hi) << 32 | uint64_t(lo)); }
121 void seed64( const uint64_t s )
122 {
123 n= 0;
124 key= { { unsigned(s >> 32) } };
125 }
126
127 Philox32& index( const uint64_t i )
128 {
129 n= i;
130 return *this;
131 }
132
133 unsigned sample( )
134 {
135 n++;
136 array2x32 ctr= { { unsigned(n >>32), unsigned(n) } };
137 return philox2x32<6>(ctr, key).v[0]; // 6 rounds
138 //~ return philox2x32<10>(ctr, key).v[0]; // 10 rounds
139 }
140
141 unsigned sample_range( const unsigned range )
142 {
143 // Efficiently Generating a Number in a Range
144 // cf http://www.pcg-random.org/posts/bounded-rands.html
145 unsigned divisor= ((-range) / range) + 1; // (2^32) / range
146 if(divisor == 0) return 0;
147
148 while(true)
149 {
150 unsigned x= sample() / divisor;
151 if(x < range) return x;
152 }
153 }
154
155 // c++ interface
156 unsigned operator() ( ) { return sample(); }
157 static constexpr unsigned min( ) { return 0; }
158 static constexpr unsigned max( ) { return ~unsigned(0); }
159 typedef unsigned result_type;
160
161protected:
162 struct array2x32 { unsigned v[2]; };
163 struct array1x32 { unsigned v[1]; };
164
165 array1x32 bumpkey( array1x32 key )
166 {
167 key.v[0] += 0x9E3779B9;
168 return key;
169 }
170
171 unsigned mulhilo32( unsigned a, unsigned b, unsigned* hip )
172 {
173 uint64_t product = uint64_t(a) * uint64_t(b);
174 *hip= unsigned(product >> 32);
175 return unsigned(product);
176 }
177
178 array2x32 round( array2x32 ctr, array1x32 key )
179 {
180 uint32_t hi, lo= mulhilo32(0xd256d193, ctr.v[0], &hi);
181
182 return { { hi^key.v[0]^ctr.v[1], lo } };
183 }
184
185 template< int R >
186 array2x32 philox2x32( array2x32 ctr, array1x32 key )
187 {
188 if(R>0) { ctr = round(ctr, key); }
189 if(R>1) { key = bumpkey(key); ctr = round(ctr, key); }
190 if(R>2) { key = bumpkey(key); ctr = round(ctr, key); }
191 if(R>3) { key = bumpkey(key); ctr = round(ctr, key); }
192 if(R>4) { key = bumpkey(key); ctr = round(ctr, key); }
193 if(R>5) { key = bumpkey(key); ctr = round(ctr, key); }
194 if(R>6) { key = bumpkey(key); ctr = round(ctr, key); }
195 if(R>7) { key = bumpkey(key); ctr = round(ctr, key); }
196 if(R>8) { key = bumpkey(key); ctr = round(ctr, key); }
197 if(R>9) { key = bumpkey(key); ctr = round(ctr, key); }
198 if(R>10) { key = bumpkey(key); ctr = round(ctr, key); }
199 if(R>11) { key = bumpkey(key); ctr = round(ctr, key); }
200 if(R>12) { key = bumpkey(key); ctr = round(ctr, key); }
201 if(R>13) { key = bumpkey(key); ctr = round(ctr, key); }
202 if(R>14) { key = bumpkey(key); ctr = round(ctr, key); }
203 if(R>15) { key = bumpkey(key); ctr = round(ctr, key); }
204
205 return ctr;
206 }
207
208 uint64_t n;
209 array1x32 key;
210};
211
212
213// threefry 2x32 13 rounds
214// rewritten from https://github.com/DEShawResearch/random123/blob/main/include/Random123/threefry.h
215struct Threefry
216{
217 Threefry( ) : n(), key() { seed64(0xcf492074862957a3); }
218 Threefry( const uint64_t s ) : n(), key() { seed64(s); }
219 Threefry( const unsigned hi, const unsigned lo= 0xacac820c ) : n(), key() { seed(hi, lo); }
220
221 void seed( const unsigned hi, const unsigned lo= 0xacac820c ) { seed64( uint64_t(hi) << 32 | uint64_t(lo) ); }
222 void seed64( const uint64_t s )
223 {
224 n= 0;
225 key= { unsigned(s >> 32), unsigned(s) };
226 }
227
228 Threefry& index( const uint64_t i )
229 {
230 n= i;
231 return *this;
232 }
233
234 unsigned sample( )
235 {
236 n++;
237 array2x32 ctr= { { unsigned(n >>32), unsigned(n) } };
238 return threefry2x32<13>(ctr, key).v[0];
239 }
240
241 unsigned sample_range( const unsigned range )
242 {
243 // Efficiently Generating a Number in a Range
244 // cf http://www.pcg-random.org/posts/bounded-rands.html
245 unsigned divisor= ((-range) / range) + 1; // (2^32) / range
246 if(divisor == 0) return 0;
247
248 while(true)
249 {
250 unsigned x= sample() / divisor;
251 if(x < range) return x;
252 }
253 }
254
255 // c++ interface
256 unsigned operator() ( ) { return sample(); }
257 static constexpr unsigned min( ) { return 0; }
258 static constexpr unsigned max( ) { return ~unsigned(0); }
259 typedef unsigned result_type;
260
261 struct array2x32 { unsigned v[2]; };
262
263private:
264 static constexpr unsigned R_32x2_0_0= 13;
265 static constexpr unsigned R_32x2_1_0= 15;
266 static constexpr unsigned R_32x2_2_0= 26;
267 static constexpr unsigned R_32x2_3_0= 6;
268 static constexpr unsigned R_32x2_4_0= 17;
269 static constexpr unsigned R_32x2_5_0= 29;
270 static constexpr unsigned R_32x2_6_0= 16;
271 static constexpr unsigned R_32x2_7_0= 24;
272
273 unsigned rotl32( const unsigned x, const unsigned n ) { return (x << (n & 31)) | (x >> ((32 - n) & 31)); }
274
275 template<int R >
276 array2x32 threefry2x32( array2x32 ctr, array2x32 k )
277 {
278 array2x32 x;
279 unsigned ks[3]= { 0, 0, 0x1BD11BDA };
280 for(int i= 0; i < 2; i++)
281 {
282 ks[i] = k.v[i];
283 x.v[i] = ctr.v[i];
284 ks[2] ^= k.v[i];
285 }
286
287 x.v[0] += ks[0];
288 x.v[1] += ks[1];
289 if(R>0) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_0_0); x.v[1] ^= x.v[0]; }
290 if(R>1) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_1_0); x.v[1] ^= x.v[0]; }
291 if(R>2) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_2_0); x.v[1] ^= x.v[0]; }
292 if(R>3) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_3_0); x.v[1] ^= x.v[0]; }
293 if(R>3) { x.v[0] += ks[1]; x.v[1] += ks[2]; x.v[1] += 1; }
294 if(R>4) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_4_0); x.v[1] ^= x.v[0]; }
295 if(R>5) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_5_0); x.v[1] ^= x.v[0]; }
296 if(R>6) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_6_0); x.v[1] ^= x.v[0]; }
297 if(R>7) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_7_0); x.v[1] ^= x.v[0]; }
298 if(R>7) { x.v[0] += ks[2]; x.v[1] += ks[0]; x.v[1] += 2; }
299 if(R>8) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_0_0); x.v[1] ^= x.v[0]; }
300 if(R>9) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_1_0); x.v[1] ^= x.v[0]; }
301 if(R>10) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_2_0); x.v[1] ^= x.v[0]; }
302 if(R>11) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_3_0); x.v[1] ^= x.v[0]; }
303 if(R>11) { x.v[0] += ks[0]; x.v[1] += ks[1]; x.v[1] += 3; }
304 if(R>12) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_4_0); x.v[1] ^= x.v[0]; }
305 if(R>13) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_5_0); x.v[1] ^= x.v[0]; }
306 if(R>14) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_6_0); x.v[1] ^= x.v[0]; }
307 if(R>15) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_7_0); x.v[1] ^= x.v[0]; }
308 if(R>15) { x.v[0] += ks[1]; x.v[1] += ks[2]; x.v[1] += 4; }
309 if(R>16) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_0_0); x.v[1] ^= x.v[0]; }
310 if(R>17) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_1_0); x.v[1] ^= x.v[0]; }
311 if(R>18) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_2_0); x.v[1] ^= x.v[0]; }
312 if(R>19) { x.v[0] += x.v[1]; x.v[1] = rotl32(x.v[1], R_32x2_3_0); x.v[1] ^= x.v[0]; }
313 if(R>19) { x.v[0] += ks[2]; x.v[1] += ks[0]; x.v[1] += 5; }
314
315 return x;
316 }
317
318 uint64_t n;
319 array2x32 key;
320};
321
322
323#ifdef __AES__
324#include <x86intrin.h>
325
326// ars 1x128 5 rounds
327// rewritten from https://github.com/DEShawResearch/random123/blob/main/include/Random123/ars.h
328// utilise les instructions AES, compiler avec g++ -maes ou g++ -march=native
329struct Ars
330{
331 Ars( ) : ctr(), key() { seed64(0xcf492074862957a3); }
332 Ars( const uint64_t s ) : ctr(), key() { seed64(s); }
333 Ars( const unsigned hi, const unsigned lo= 0xacac820c ) : ctr(), key() { seed(hi, lo); }
334
335 void seed( const unsigned hi, const unsigned lo= 0xacac820c ) { seed64( uint64_t(hi) << 32 | uint64_t(lo) ); }
336 void seed64( const uint64_t s )
337 {
338 ctr= { 0, 0x838ca1328d07d3ba };
339 key= { s, 0xcf492074862957a3 };
340 }
341
342 Ars& index( const uint64_t i )
343 {
344 ctr= { i, 0x838ca1328d07d3ba };
345 return *this;
346 }
347
348 unsigned sample( )
349 {
350 array1x128 one= { 1, 0 };
351 ctr= _mm_add_epi64(ctr, one);
352
353 array1x128 out= ars1x128<5>(ctr, key);
354 return _mm_extract_epi32(out.v, 3);
355 }
356
357 unsigned sample_range( const unsigned range )
358 {
359 // Efficiently Generating a Number in a Range
360 // cf http://www.pcg-random.org/posts/bounded-rands.html
361 unsigned divisor= ((-range) / range) + 1; // (2^32) / range
362 if(divisor == 0) return 0;
363
364 while(true)
365 {
366 unsigned x= sample() / divisor;
367 if(x < range) return x;
368 }
369 }
370
371 // c++ interface
372 unsigned operator() ( ) { return sample(); }
373 static constexpr unsigned min( ) { return 0; }
374 static constexpr unsigned max( ) { return ~unsigned(0); }
375 typedef unsigned result_type;
376
377protected:
378 struct array1x128
379 {
380 __m128i v;
381
382 array1x128( ) = default;
383 array1x128( const uint64_t hi, const uint64_t lo ) { v= _mm_set_epi64x(hi, lo); }
384 array1x128( const __m128i x ) { v= x; }
385 operator __m128i( ) const { return v; }
386 };
387
388 template< int R >
389 array1x128 ars1x128( array1x128 ctr, array1x128 key )
390 {
391 __m128i kweyl = _mm_set_epi64x(0xBB67AE8584CAA73BUL, 0x9E3779B97F4A7C15UL);
392 __m128i k = key.v;
393 __m128i v = _mm_xor_si128(ctr.v, k);
394
395 if(R>1) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
396 if(R>2) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
397 if(R>3) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
398 if(R>4) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
399 if(R>5) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
400 if(R>6) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
401 if(R>7) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
402 if(R>8) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
403 if(R>9) { k = _mm_add_epi64(k, kweyl); v = _mm_aesenc_si128(v, k); }
404
405 k = _mm_add_epi64(k, kweyl);
406 v = _mm_aesenclast_si128(v, k);
407 return v;
408 }
409
410 array1x128 ctr;
411 array1x128 key;
412};
413#else
414struct Ars {}; // compiler avec g++ -maes
415#endif
416
417
418#endif
Point max(const Point &a, const Point &b)
renvoie la plus grande composante de chaque point { max(a.x, b.x), max(a.y, b.y), max(a....
Definition vec.cpp:35
Point min(const Point &a, const Point &b)
renvoie la plus petite composante de chaque point { min(a.x, b.x), min(a.y, b.y), min(a....
Definition vec.cpp:30
Definition rand123.h:414