C++ - How to program functions that work on temporaries and don't allocate

86 Views Asked by At

I am trying to implement a fixed size array class that will represent small size vectors. I wanted to have typical vector operations defined, like multiplication by scalar and sum with another vector. The problem is I cannot get the same performance with these two codes:

for (size_t i = 0; i < N; ++i)
    vout[i] = (k[0][i] + 2*k[1][i] + 2*k[2][i] + k[3][i])/6;
// vs
vout = (k[0] + 2.0*k[1] + 2.0*k[2] + k[3])/6.0;

The reason is that the second option is creating more arrays in the process, while the first adds in place. I would like to know if there is a way to write a function that operates on temporaries (r-value references) so that a sum is performed over the temporary, without allocating an extra vector.

My current code:

template<class T, int N>
class SVec {
  public:
    inline T& operator[](int i) { return mdata[i]; }
    inline const T operator[](size_t i) const { return mdata[i]; }
    inline T* data() { return mdata; }
    inline T const* data() const { return mdata; }

    inline SVec& operator+=(const SVec& rhs) {
        for (size_t i = 0; i < N; ++i) mdata[i] += rhs.mdata[i];
        return *this;
    }
    inline SVec& operator-=(const SVec& rhs) {
        for (size_t i = 0; i < N; ++i) mdata[i] -= rhs.mdata[i];
        return *this;
    }
    inline SVec& operator*=(T rhs) {
        for (size_t i = 0; i < N; ++i) mdata[i] *= rhs;
        return *this;
    }
    inline SVec& operator/=(T rhs) {
        for (size_t i = 0; i < N; ++i) mdata[i] /= rhs;
        return *this;
    }

    inline SVec& fma(const SVec& x, T y) {
        for (size_t i = 0; i < N; ++i) mdata[i] += x[i]*y;
        return *this;
    }

    inline SVec&& operator+(const SVec& rhs) && {
        for (size_t i = 0; i < N; ++i) mdata[i] += rhs.mdata[i];
        return std::move(*this);
    }
    inline SVec&& operator*(T rhs) && {
        for (size_t i = 0; i < N; ++i) mdata[i] *= rhs;
        return std::move(*this);
    }
    inline SVec&& operator/(T rhs) && {
        for (size_t i = 0; i < N; ++i) mdata[i] /= rhs;
        return std::move(*this);
    }


  private:
    T mdata[N];
};

template<typename T, int N>
inline SVec<T, N> operator+(SVec<T, N> lhs, const SVec<T, N>& rhs) {
    return lhs += rhs;
}
// Tried:
// template<typename T, int N>
// inline SVec<T, N>&& operator+(SVec<T, N>&& lhs, const SVec<T, N>& rhs) {
//     lhs += rhs;
//     return lhs;
// }
// template<typename T, int N>
// inline SVec<T, N>&& operator+(const SVec<T, N>& rhs, SVec<T, N>&& lhs) {
//     lhs += rhs;
//     return lhs;
// }
// template<typename T, int N>
// inline SVec<T, N>&& operator+(SVec<T, N>&& lhs, SVec<T, N>&& rhs) {
//     lhs += rhs;
//     return lhs;
// }
template<typename T, int N>
inline SVec<T, N> operator-(SVec<T, N> lhs, const SVec<T, N>& rhs) {
    return lhs -= rhs;
}
template<typename T, int N>
inline SVec<T, N> operator*(SVec<T, N> lhs, T rhs) {
    return lhs *= rhs;
}
tem
plate<typename T, int N>
inline SVec<T, N> operator*(T rhs, SVec<T, N> lhs) {
    return lhs *= rhs;
}
template<typename T, int N>
inline SVec<T, N> operator/(SVec<T, N> lhs, T rhs) {
    return lhs /= rhs;
}
template<typename T, int N>
inline SVec<T, N> operator/(T rhs, SVec<T, N> lhs) {
    return lhs /= rhs;
}

template<typename T, int N>
inline SVec<T, N> fma(const SVec<T, N>& x, T y, SVec<T, N> z) {
    return z.fma(x, y);
}

template<int N>
using DoubleVec = SVec<double, N>;
1

There are 1 best solutions below

2
Onur Onder On

If I understood you correctly, you are trying to return the object which is modified on the original copy. Which cannot be done on a standalone operator+.

However there is the concept of return value optimization which may apply to your compiler depending on the usage, if you immediately return the result from a function which returns by value, it is possible that the compiler will optimize it not to store it on a temporary: Return value optimization

I can see that your standalone operator+ only accepts the same type, so you can also make a similar operator+(...)& for as a class member and that can return itself. But this will bring more problems as shown below:

    inline SVec& operator+(const SVec& rhs)& {
        for (size_t i = 0; i < N; ++i) mdata[i] += rhs.mdata[i];
        return *this;
    }

This will not work correctly when you have a statement like this, but it will not create a new copy.

    SVec<int, 2> s1;
    SVec<int, 2> s2;
    SVec<int, 2> s3;
    s3 = s1 + s2;