How do I force the GCC compiler to unroll loops?

354 Views Asked by At

I'm writing a C program and want a loop unrolled. I need to force the compiler to unroll a loop where the number of iterations is known at compile time.

I've tried compiling using "-O0 -funroll-all-loops". I've tried using #pragma GCC unroll, I've tried using attribute((optimize("unroll-loops"))). None of it seems to unroll the following test function:


#define STR_LEN 12
int __attribute__((noinline)) func(char *input_str) {
    char* fixed_str = "fixed string";
    for (int i = 0; i < STR_LEN; i++) {
        if (fixed_str[i] != input_str[i]) {
            return -1;
        }
        if (i == STR_LEN - 1) {
            return 0;
        }
    }
    return 0;
}

I've compiled using "-S" to see the assembly code and the loop seems the same.

Edit: I used #pragma unroll and compiled with -O1 and it works.

1

There are 1 best solutions below

22
Luis Colorado On

As you have been indicated in the comments, you are meshing with the compilation options. But you can never enforce loop unrolling (or any other optimization on some code) if the compiler decides that the loop is not possible to unroll. Your loop seems to be unrollable, but the requirement that the unrolled loop must behave externally (as the user sees it) as it does without the optimization applies, can enforce the code to be conserved as a loop.

BTW, you can try to unroll it yourself. Let's see what happens:

#define STR_LEN 12
int __attribute__((noinline)) func(char *input_str) {
    char* fixed_str = "fixed string";
    for (int i = 0; i < STR_LEN; i++) {
        if (fixed_str[i] != input_str[i]) {
            return -1;
        }
        if (i == STR_LEN - 1) {
            return 0;
        }
    }
    return 0;
}
  • in the first step, we unroll the loop, as many times as necessary, to eliminate the i loop control variable. I'll substitute also the value of STR_LEN - 1 constant expression so the ifs can be clearly eliminated in the next optimization pass. This is something we can do, because the i variable is not modified inside the loop (think that, just passing it by non-const * reference to a function would allow it to be modified inside the function and automatically blocked us to do this step, or even just declaring it as volatile)
int __attribute__((noinline)) func(char *input_str) {
    char* fixed_str = "fixed string";
    /* i = 0 */
        if (fixed_str[0] != input_str[0]) {
            return -1;
        }
        if (0 == 11) {
            return 0;
        }
    /* i = 1 */
        if (fixed_str[1] != input_str[1]) {
            return -1;
        }
        if (1 == 11) {
            return 0;
        }
    /* i = 2 */
        if (fixed_str[2] != input_str[2]) {
            return -1;
        }
        if (2 == 11) {
            return 0;
        }
    /* i = 3 */
        if (fixed_str[3] != input_str[3]) {
            return -1;
        }
        if (3 == 11) {
            return 0;
        }
    /* i = 4 */
        if (fixed_str[4] != input_str[4]) {
            return -1;
        }
        if (4 == 11) {
            return 0;
        }
    /* i = 5 */
        if (fixed_str[5] != input_str[5]) {
            return -1;
        }
        if (5 == 11) {
            return 0;
        }
    /* i = 6 */
        if (fixed_str[6] != input_str[6]) {
            return -1;
        }
        if (6 == 11) {
            return 0;
        }
    /* i = 7 */
        if (fixed_str[7] != input_str[7]) {
            return -1;
        }
        if (7 == 11) {
            return 0;
        }
    /* i = 8 */
        if (fixed_str[8] != input_str[8]) {
            return -1;
        }
        if (8 == 11) {
            return 0;
        }
    /* i = 9 */
        if (fixed_str[9] != input_str[9]) {
            return -1;
        }
        if (9 == 11) {
            return 0;
        }
    /* i = 10 */
        if (fixed_str[10] != input_str[10]) {
            return -1;
        }
        if (10 == 11) {
            return 0;
        }
    /* i = 11 */
        if (fixed_str[11] != input_str[11]) {
            return -1;
        }
        if (11 == 11) {
            return 0;
        }
    return 0;
}
  • now, we can eliminate all the if (i == STR_LEN -1) statements that result in a false result, as there's no else part in the if statement.
int __attribute__((noinline)) func(char *input_str) {
    char* fixed_str = "fixed string";
    /* i = 0 */
        if (fixed_str[0] != input_str[0]) {
            return -1;
        }
    /* i = 1 */
        if (fixed_str[1] != input_str[1]) {
            return -1;
        }
    /* i = 2 */
        if (fixed_str[2] != input_str[2]) {
            return -1;
        }
    /* i = 3 */
        if (fixed_str[3] != input_str[3]) {
            return -1;
        }
    /* i = 4 */
        if (fixed_str[4] != input_str[4]) {
            return -1;
        }
    /* i = 5 */
        if (fixed_str[5] != input_str[5]) {
            return -1;
        }
    /* i = 6 */
        if (fixed_str[6] != input_str[6]) {
            return -1;
        }
    /* i = 7 */
        if (fixed_str[7] != input_str[7]) {
            return -1;
        }
    /* i = 8 */
        if (fixed_str[8] != input_str[8]) {
            return -1;
        }
    /* i = 9 */
        if (fixed_str[9] != input_str[9]) {
            return -1;
        }
    /* i = 10 */
        if (fixed_str[10] != input_str[10]) {
            return -1;
        }
    /* i = 11 */
        if (fixed_str[11] != input_str[11]) {
            return -1;
        }
        if (11 == 11) {
            return 0;
        }
    return 0;
}
  • now, we can eliminate redundant code (the last if (11 == 11) test is not necessary, as it results in the same code executed (return 0;) as if it has not been there from the beginning.
  • we can organize all tests into a || expression, based on the fact that all of them result in the same code executed (return -1) when the expression avaluates to a true result.
int __attribute__((noinline)) func(char *input_str) {
    char* fixed_str = "fixed string";
    /* i = 0 */
        if (fixed_str[0] != input_str[0]
         || fixed_str[1] != input_str[1]
         || fixed_str[2] != input_str[2]
         || fixed_str[3] != input_str[3]
         || fixed_str[4] != input_str[4]
         || fixed_str[5] != input_str[5]
         || fixed_str[6] != input_str[6]
         || fixed_str[7] != input_str[7]
         || fixed_str[8] != input_str[8]
         || fixed_str[9] != input_str[9]
         || fixed_str[10] != input_str[10]
         || fixed_str[11] != input_str[11]) {
            return -1;
        }
    return 0;
}
  • there's still another level of optimization, based on the fact that fixed_str is a constant string.
int __attribute__((noinline)) func(char *input_str) {
        if ('f' != input_str[0]
         || 'i' != input_str[1]
         || 'x' != input_str[2]
         || 'e' != input_str[3]
         || 'd' != input_str[4]
         || ' ' != input_str[5]
         || 's' != input_str[6]
         || 't' != input_str[7]
         || 'r' != input_str[8]
         || 'i' != input_str[9]
         || 'n' != input_str[10]
         || 'g' != input_str[11]) {
            return -1;
        }
    return 0;
}

and probably you could reduce the test to just three tests with integers, if you realize that you can make the || expression into a && by negating it:

int __attribute__((noinline)) func(char *input_str) {
        if ('f' == input_str[0]
         && 'i' == input_str[1]
         && 'x' == input_str[2]
         && 'e' == input_str[3]
         && 'd' == input_str[4]
         && ' ' == input_str[5]
         && 's' == input_str[6]
         && 't' == input_str[7]
         && 'r' == input_str[8]
         && 'i' == input_str[9]
         && 'n' == input_str[10]
         && 'g' == input_str[11]) {
            return 0;
        }
    return -1;
}
  • a final note, I'd allow the compiler to inline this function, as it has become so simple that the call/return overhead becomes significative.