I'm performing some experimentation on C++20 having in mind their potential usage in embedded application. However I noticed that both GCC and Clang generates frames that are bigger than the ones that would be theoretical required in an optimized version. In particular for my code (in the co-await column, "yes" means the source as I'm posting it, "commented" means with lines where co-awaits are commented)
| Compiler (-O3) | co-await | coroutine1 | coroutine4 |
|---|---|---|---|
| GCC 13.2 x86-64 | yes | 72 bytes | 144 bytes |
| clang 17.0.1 x86-64 | yes | 72 bytes | 136 bytes |
| GCC 13.2 x86-64 | commented | 80 bytes | 136 bytes |
| clang 17.0.1 x86-64 | commented | 32 bytes | 32 bytes |
I expected all allocation to be similar to the one on last row if not even lower (the buffer is not used after co_await so the only state of the function is just the function pointer to fill and the value of ret). Am I doing something wrong or is it just that the coroutines are so new and so complex to handle that compilers still have to perform good optimization on the frames for them? Thank to everybody that could point me to a solution, if one exists.
Alessandro
#include <coroutine>
#include <iostream>
#include <stdint.h>
struct promise;
static constexpr std::size_t bufSize=20;
static int numArgc=0;
static std::size_t allocatedSize=0;
struct coroutine : std::coroutine_handle<promise>
{
using promise_type = ::promise;
};
struct promise
{
coroutine get_return_object() { return {coroutine::from_promise(*this)}; }
std::suspend_always initial_suspend() noexcept { return {}; }
std::suspend_always final_suspend() noexcept { return {}; }
void return_value(unsigned val) { sum=val; }
void unhandled_exception() {}
void* operator new(std::size_t size)
{
allocatedSize+=size;
return malloc(size);
}
void operator delete(void* ptr, std::size_t size)
{
free(ptr);
}
unsigned sum=0;
};
uint32_t sum(uint8_t *buffer) {
uint32_t ret=0;
for(std::size_t i=0;i<bufSize;i++) {
ret+=buffer[i];
}
return ret;
}
void fill(uint8_t *buffer) {
for(std::size_t i=0;i<bufSize;i++) {
buffer[i]=numArgc+i;
}
}
coroutine coroutine1(void (*fill)(uint8_t *buffer)) {
unsigned ret=0;
{
uint8_t buf[bufSize]={};
fill(buf);
ret+=sum(buf);
co_await std::suspend_always{}; // commented in "co-await: commented"
}
co_return ret;
}
coroutine coroutine4(void (*fill)(uint8_t *buffer)) {
unsigned ret=0;
{
uint8_t buf[bufSize]={};
fill(buf);
ret+=sum(buf);
co_await std::suspend_always{}; // commented in "co-await: commented"
}
{
uint8_t buf[bufSize]={};
fill(buf);
ret+=sum(buf);
co_await std::suspend_always{}; // commented in "co-await: commented"
}
{
uint8_t buf[bufSize]={};
fill(buf);
ret+=sum(buf);
co_await std::suspend_always{}; // commented in "co-await: commented"
}
{
uint8_t buf[bufSize]={};
fill(buf);
ret+=sum(buf);
co_await std::suspend_always{}; // commented in "co-await: commented"
}
co_return ret;
}
int main(int argc, char *argv[])
{
numArgc=argc;
auto h1=coroutine1(fill);
while(!h1.done()) {
h1.resume();
}
std::cout<<"1 loop sum: "<<h1.promise().sum<<" Allocated:"<<allocatedSize<<" bytes\n";
allocatedSize=0;
auto h4=coroutine4(fill);
while(!h4.done()) {
h4.resume();
}
std::cout<<"4 loops sum: "<<h4.promise().sum<<" Allocated:"<<allocatedSize<<" bytes\n";
}