10#ifndef RANLUXPP_MULMOD_H
11#define RANLUXPP_MULMOD_H
22static void multiply9x9(
const uint64_t *in1,
const uint64_t *in2,
25 unsigned nextCarry = 0;
27#if defined(__clang__) || defined(__INTEL_COMPILER)
29#elif defined(__GNUC__) && __GNUC__ >= 8
33 for (
int i = 0; i < 18; i++) {
34 uint64_t current = next;
35 unsigned carry = nextCarry;
40#if defined(__clang__) || defined(__INTEL_COMPILER)
42#elif defined(__GNUC__) && __GNUC__ >= 8
46 for (
int j = 0; j < 9; j++) {
51 uint64_t fac1 = in1[j];
52 uint64_t fac2 = in2[k];
53#if defined(__SIZEOF_INT128__) && !defined(CLHEP_NO_INT128)
55#pragma GCC diagnostic push
56#pragma GCC diagnostic ignored "-Wpedantic"
58 unsigned __int128 prod = fac1;
61 uint64_t upper = prod >> 64;
62 uint64_t lower =
static_cast<uint64_t
>(prod);
64#pragma GCC diagnostic pop
67 uint64_t upper1 = fac1 >> 32;
68 uint64_t lower1 =
static_cast<uint32_t
>(fac1);
70 uint64_t upper2 = fac2 >> 32;
71 uint64_t lower2 =
static_cast<uint32_t
>(fac2);
75 uint64_t upper = upper1 * upper2;
76 uint64_t middle1 = upper1 * lower2;
77 uint64_t middle2 = lower1 * upper2;
78 uint64_t lower = lower1 * lower2;
83 uint64_t middle =
add_overflow(middle1, middle2, overflow);
94 uint64_t overflow_add = overflow * (uint64_t(1) << 32);
99 upper += overflow_add;
101 uint64_t middle_upper = middle >> 32;
102 uint64_t middle_lower = middle << 32;
119 upper += middle_upper;
123 current =
add_carry(current, lower, carry);
126 next =
add_carry(next, upper, nextCarry);
129 next =
add_carry(next, carry, nextCarry);
143static void mod_m(
const uint64_t *mul, uint64_t *out) {
146 for (
int i = 0; i < 9; i++) {
174 uint64_t c_unsigned =
static_cast<uint64_t
>(c);
177 int64_t t2 = t0 - (c_unsigned << 48);
181 int64_t t1 = t2 >> 48;
187 uint64_t out_0 =
sub_carry(r_0, c, carry);
190 for (
int i = 1; i < 3; i++) {
194 uint64_t out_i =
sub_carry(r_i, t0, carry);
201 uint64_t out_3 =
sub_carry(r_3, t2, carry);
204 for (
int i = 4; i < 9; i++) {
208 uint64_t out_i =
sub_carry(r_i, t1, carry);
219static void mulmod(
const uint64_t *in1, uint64_t *inout) {
220 uint64_t mul[2 * 9] = {0};
232static void powermod(
const uint64_t *base, uint64_t *res, uint64_t
n) {
233 uint64_t
fac[9] = {0};
236 for (
int i = 1; i < 9; i++) {
241 uint64_t mul[18] = {0};
static const G4double fac
static uint64_t sub_overflow(uint64_t a, uint64_t b, unsigned &overflow)
Compute a - b and set overflow accordingly.
static uint64_t sub_carry(uint64_t a, uint64_t b, unsigned &carry)
Compute a - b and increment carry if there was an overflow.
static uint64_t add_carry(uint64_t a, uint64_t b, unsigned &carry)
Compute a + b and increment carry if there was an overflow.
static int64_t compute_r(const uint64_t *upper, uint64_t *r)
static uint64_t add_overflow(uint64_t a, uint64_t b, unsigned &overflow)
Compute a + b and set overflow accordingly.
static void mulmod(const uint64_t *in1, uint64_t *inout)
static void powermod(const uint64_t *base, uint64_t *res, uint64_t n)
static void mod_m(const uint64_t *mul, uint64_t *out)
static void multiply9x9(const uint64_t *in1, const uint64_t *in2, uint64_t *out)