I'm practicing usage with 256bit AVX SIMD instructions and I'm trying to compute the ternary (a op b) ? x : y
. I have something that quite works, but not really.
EDIT: Code below works. I got a nice speed-up by explicitly vectorizing the ternary operation with this sequence of operations inside a tight loop. Hopefully someone else finds this information useful.
#include <iostream>
#include <immintrin.h>
int main() {
float store[8] __attribute__((aligned(32))) = {0}; // data store
__m256 y = _mm256_set1_ps(10.f); // y = [10,...,10]
__m256 x = _mm256_set1_ps(100.f); // x = [100,...,100]
__m256 a = _mm256_set1_ps(0.f); // a = [0,...,0]
__m256 b = _mm256_setr_ps(0.f, 1.f, 1.f, 1.f, 0.f, 1.f, 1.f, 0.f); // b = [0,1,1,1,0,1,1,0]
__m256 mask1 = _mm256_cmp_ps(a,b, _CMP_LT_OQ); // (a < b ) ? true : false
// mark1 is now = [false, true, true, true, false, true, true, false];
__m256 res = _mm256_blendv_ps(y,x,mask1); // this operator "blends" the results given the true/false mask
// 100 if true, 10 if false
_mm256_store_ps(&store[0], res); // store the results
for(unsigned i = 0; i < 8; i++)
std::cout << store[i] << std::endl;
/* prints
10
100
100
100
10
100
100
10 */
return EXIT_SUCCESS;
}