Program Listing for File addition.cu
↰ Return to documentation for file (src/lib/kernel/addition.cu)
// Copyright 2024-2026 Alişah Özcan
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
// Developer: Alişah Özcan
#include <heongpu/kernel/addition.cuh>
namespace heongpu
{
__global__ void addition(Data64* in1, Data64* in2, Data64* out,
Modulus64* modulus, int n_power)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x; // ring size
int idy = blockIdx.y; // rns count
int idz = blockIdx.z; // cipher count
int location = idx + (idy << n_power) + ((gridDim.y * idz) << n_power);
out[location] =
OPERATOR_GPU_64::add(in1[location], in2[location], modulus[idy]);
}
__global__ void substraction(Data64* in1, Data64* in2, Data64* out,
Modulus64* modulus, int n_power)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x; // ring size
int idy = blockIdx.y; // rns count
int idz = blockIdx.z; // cipher count
int location = idx + (idy << n_power) + ((gridDim.y * idz) << n_power);
out[location] =
OPERATOR_GPU_64::sub(in1[location], in2[location], modulus[idy]);
}
__global__ void negation(Data64* in1, Data64* out, Modulus64* modulus,
int n_power)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x; // ring size
int idy = blockIdx.y; // rns count
int idz = blockIdx.z; // cipher count
int location = idx + (idy << n_power) + ((gridDim.y * idz) << n_power);
Data64 zero = 0;
out[location] = OPERATOR_GPU_64::sub(zero, in1[location], modulus[idy]);
}
__global__ void addition_plain_bfv_poly(Data64* cipher, Data64* plain,
Data64* output, Modulus64* modulus,
Modulus64 plain_mod, Data64 Q_mod_t,
Data64 upper_threshold,
Data64* coeffdiv_plain, int n_power)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x; // ring size
int block_y = blockIdx.y; // rns count
int block_z = blockIdx.z; // cipher size
int location =
idx + (block_y << n_power) + ((gridDim.y * block_z) << n_power);
if (block_z == 0)
{
Data64 message = plain[idx];
Data64 ciphertext = cipher[location];
Data64 fix = message * Q_mod_t;
fix = fix + upper_threshold;
fix = int(fix / plain_mod.value);
Data64 result = OPERATOR_GPU_64::mult(
message, coeffdiv_plain[block_y], modulus[block_y]);
result = OPERATOR_GPU_64::add(result, fix, modulus[block_y]);
result = OPERATOR_GPU_64::add(result, ciphertext, modulus[block_y]);
output[location] = result;
}
else
{
Data64 ciphertext = cipher[location];
output[location] = ciphertext;
}
}
__global__ void addition_plain_bfv_poly_inplace(
Data64* cipher, Data64* plain, Data64* output, Modulus64* modulus,
Modulus64 plain_mod, Data64 Q_mod_t, Data64 upper_threshold,
Data64* coeffdiv_plain, int n_power)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x; // ring size
int block_y = blockIdx.y; // rns count
int location = idx + (block_y << n_power);
Data64 message = plain[idx];
Data64 ciphertext = cipher[location];
Data64 fix = message * Q_mod_t;
fix = fix + upper_threshold;
fix = int(fix / plain_mod.value);
Data64 result = OPERATOR_GPU_64::mult(message, coeffdiv_plain[block_y],
modulus[block_y]);
result = OPERATOR_GPU_64::add(result, fix, modulus[block_y]);
result = OPERATOR_GPU_64::add(result, ciphertext, modulus[block_y]);
output[location] = result;
}
__global__ void
substraction_plain_bfv_poly(Data64* cipher, Data64* plain, Data64* output,
Modulus64* modulus, Modulus64 plain_mod,
Data64 Q_mod_t, Data64 upper_threshold,
Data64* coeffdiv_plain, int n_power)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x; // ring size
int block_y = blockIdx.y; // rns count
int block_z = blockIdx.z; // cipher size
int location =
idx + (block_y << n_power) + ((gridDim.y * block_z) << n_power);
if (block_z == 0)
{
Data64 message = plain[idx];
Data64 ciphertext = cipher[location];
Data64 fix = message * Q_mod_t;
fix = fix + upper_threshold;
fix = int(fix / plain_mod.value);
Data64 result = OPERATOR_GPU_64::mult(
message, coeffdiv_plain[block_y], modulus[block_y]);
result = OPERATOR_GPU_64::add(result, fix, modulus[block_y]);
result = OPERATOR_GPU_64::sub(ciphertext, result, modulus[block_y]);
output[location] = result;
}
else
{
Data64 ciphertext = cipher[location];
output[location] = ciphertext;
}
}
__global__ void substraction_plain_bfv_poly_inplace(
Data64* cipher, Data64* plain, Data64* output, Modulus64* modulus,
Modulus64 plain_mod, Data64 Q_mod_t, Data64 upper_threshold,
Data64* coeffdiv_plain, int n_power)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x; // ring size
int block_y = blockIdx.y; // rns count
int location = idx + (block_y << n_power);
Data64 message = plain[idx];
Data64 ciphertext = cipher[location];
Data64 fix = message * Q_mod_t;
fix = fix + upper_threshold;
fix = int(fix / plain_mod.value);
Data64 result = OPERATOR_GPU_64::mult(message, coeffdiv_plain[block_y],
modulus[block_y]);
result = OPERATOR_GPU_64::add(result, fix, modulus[block_y]);
result = OPERATOR_GPU_64::sub(ciphertext, result, modulus[block_y]);
output[location] = result;
}
__global__ void addition_plain_ckks_poly(Data64* in1, Data64* in2,
Data64* out, Modulus64* modulus,
int n_power)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x; // ring size
int idy = blockIdx.y; // rns count
int idz = blockIdx.z; // cipher count
int location = idx + (idy << n_power) + ((gridDim.y * idz) << n_power);
if (idz == 0)
{
out[location] = OPERATOR_GPU_64::add(in1[location], in2[location],
modulus[idy]);
}
else
{
Data64 ciphertext = in1[location];
out[location] = ciphertext;
}
}
__global__ void substraction_plain_ckks_poly(Data64* in1, Data64* in2,
Data64* out,
Modulus64* modulus,
int n_power)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x; // ring size
int idy = blockIdx.y; // rns count
int idz = blockIdx.z; // cipher count
int location = idx + (idy << n_power) + ((gridDim.y * idz) << n_power);
if (idz == 0)
{
out[location] = OPERATOR_GPU_64::sub(in1[location], in2[location],
modulus[idy]);
}
else
{
out[location] = in1[location];
}
}
__global__ void addition_constant_plain_ckks_poly(Data64* in1, double in2,
Data64* out,
Modulus64* modulus,
double two_pow_64,
int n_power)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x; // ring size
int idy = blockIdx.y; // rns count
int idz = blockIdx.z; // cipher count
int location = idx + (idy << n_power) + ((gridDim.y * idz) << n_power);
if (idz == 0)
{
double message_r = in2;
double coeff_double = round(message_r);
bool is_negative = signbit(coeff_double);
coeff_double = fabs(coeff_double);
// Change Type
Data64 coeff[2] = {
static_cast<std::uint64_t>(fmod(coeff_double, two_pow_64)),
static_cast<std::uint64_t>(coeff_double / two_pow_64)};
Data64 pt;
if (is_negative)
{
pt = OPERATOR_GPU_64::reduce(coeff, modulus[idy]);
pt = OPERATOR_GPU_64::sub(modulus[idy].value, pt, modulus[idy]);
}
else
{
pt = OPERATOR_GPU_64::reduce(coeff, modulus[idy]);
}
out[location] =
OPERATOR_GPU_64::add(in1[location], pt, modulus[idy]);
}
else
{
Data64 ciphertext = in1[location];
out[location] = ciphertext;
}
}
__global__ void
substraction_constant_plain_ckks_poly(Data64* in1, double in2, Data64* out,
Modulus64* modulus, double two_pow_64,
int n_power)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x; // ring size
int idy = blockIdx.y; // rns count
int idz = blockIdx.z; // cipher count
int location = idx + (idy << n_power) + ((gridDim.y * idz) << n_power);
if (idz == 0)
{
double message_r = in2;
double coeff_double = round(message_r);
bool is_negative = signbit(coeff_double);
coeff_double = fabs(coeff_double);
// Change Type
Data64 coeff[2] = {
static_cast<std::uint64_t>(fmod(coeff_double, two_pow_64)),
static_cast<std::uint64_t>(coeff_double / two_pow_64)};
Data64 pt;
if (is_negative)
{
pt = OPERATOR_GPU_64::reduce(coeff, modulus[idy]);
pt = OPERATOR_GPU_64::sub(modulus[idy].value, pt, modulus[idy]);
}
else
{
pt = OPERATOR_GPU_64::reduce(coeff, modulus[idy]);
}
out[location] =
OPERATOR_GPU_64::sub(in1[location], pt, modulus[idy]);
}
else
{
out[location] = in1[location];
}
}
__global__ void set_zero_cipher_ckks_poly(Data64* in1, Modulus64* modulus,
int n_power)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x; // ring size
int idy = blockIdx.y; // rns count
int idz = blockIdx.z; // cipher count
int location = idx + (idy << n_power) + ((gridDim.y * idz) << n_power);
in1[location] = 0;
}
} // namespace heongpu