I tried to parallelize my block cipher but it doesn't work right. The code can run but the result is different from not using parallelize .I'm a newbie to OpenMp. The code works fine with out OpenMp. Hereis the code with also every function. And what is the best solution if I just want to parallelize the loop only and sequential saving to file using openmp. I have a file with a large volume of information I would like to split into equal chunks (16 bytes each) , encrypted using openmp.
void S(uint8_t* x) {
for (int i = 0; i < 16; i++)
{
x[i] = Sbox[x[i]];
}
}
void X(uint8_t* x, uint8_t* key, uint8_t* temp) {
for (int i = 0; i < 16; i++)
{
temp[i] = x[i] ^ key[i];
}
}
void SL_1(unsigned char* pt, unsigned char* ct) {
for (int i = 0; i < 16; i++)
{
ct[15 - i] = LUT_1[0][pt[0]].a[i] ^ LUT_1[1][pt[1]].a[i] ^ LUT_1[2][pt[2]].a[i] ^ LUT_1[3][pt[3]].a[i] ^
LUT_1[4][pt[4]].a[i] ^ LUT_1[8][pt[8]].a[i] ^ LUT_1[12][pt[12]].a[i] ^
LUT_1[5][pt[5]].a[i] ^ LUT_1[9][pt[9]].a[i] ^ LUT_1[13][pt[13]].a[i] ^
LUT_1[6][pt[6]].a[i] ^ LUT_1[10][pt[10]].a[i] ^ LUT_1[14][pt[14]].a[i] ^
LUT_1[7][pt[7]].a[i] ^ LUT_1[11][pt[11]].a[i] ^ LUT_1[15][pt[15]].a[i];
}
}
void decrypt(unsigned char* pt, unsigned char** K, unsigned char** L1_K) {
unsigned char* temp = new unsigned char[16];
S(pt);
SL_1(pt, temp);
X(temp, L1_K[9], temp);
SL_1(temp, pt);
X(pt, L1_K[8], pt);
SL_1(pt, temp);
X(temp, L1_K[7], temp);
SL_1(temp, pt);
X(pt, L1_K[6], pt);
SL_1(pt, temp);
X(temp, L1_K[5], temp);
SL_1(temp, pt);
X(pt, L1_K[4], pt);
SL_1(pt, temp);
X(temp, L1_K[3], temp);
SL_1(temp, pt);
X(pt, L1_K[2], pt);
SL_1(pt, temp);
X(temp, L1_K[1], pt);
S1(pt);
X(pt, K[0], pt);
}
void divideByte(unsigned char out[16], bitset<128>& data)
{
bitset<128> temp;
for (int i = 0; i < 16; ++i)
{
temp = (data << 8 * i) >> 120;
out[i] = temp.to_ulong();
}
}
bitset<128> mergeByte(unsigned char in[16])
{
bitset<128> res;
bitset<128> temp;
for (int i = 0; i < 16; ++i)
{
temp = in[i];
temp <<= 8 * (15 - i);
res |= temp;
}
return res;
}
int main() {
unsigned char plain[16];
long long count = file_size_2_d / 16;
bitset<128>* data2 = new bitset<128>[count];
for (long long i = 0; i < count; i++)
{
input_d.read((char*)&data2[i], sizeof(data2[i]));
}
int iCPU = omp_get_num_procs();
// Now set the number of threads
omp_set_num_threads(iCPU);
#pragma omp parallel for schedule(static) private(plain)
for (long long i = 0; i < count; i++)
{
divideByte(plain, data2[i]);
decrypt(plain, K, L1_K);
data2[i] = mergeByte(plain);
}
for (long long i = 0; i < count; i++)
{
output_d.write((char*)&(data2[i]), sizeof(data));
}
}