Hacker News new | past | comments | ask | show | jobs | submit login

> or use an array

That's absolutely horrible: you've now introduced the possibility of accidental out-of-bounds access where there previously was none, not to mention that you just forced the compiler to put those variables on the stack instead of using registers.

Contrived example:

  int array(int lol)
  {
        unsigned counter[3] = {1,5,7};

        for (int i = 0; i < lol; i++)
                counter[i % 3]++;

        return counter[0] + counter[1] + counter[2];
  }

  int vars(int lol)
  {
        unsigned counter1 = 1;
        unsigned counter2 = 5;
        unsigned counter3 = 7;

        for (int i = 0; i < lol; i++)
                switch(i % 3) {
                case 0: counter1++;
                case 1: counter2++;
                case 2: counter3++;
                }

        return counter1 + counter2 + counter3;
  }
The output is quite different (gcc -c -O3 -std=gnu99):

  Disassembly of section .text:

  0000000000000000 <array>:
   0:   85 ff                   test   %edi,%edi
   2:   c7 44 24 e8 01 00 00    movl   $0x1,-0x18(%rsp)
   9:   00 
   a:   c7 44 24 ec 05 00 00    movl   $0x5,-0x14(%rsp)
  11:   00 
  12:   c7 44 24 f0 07 00 00    movl   $0x7,-0x10(%rsp)
  19:   00 
  1a:   7e 5f                   jle    7b <array+0x7b>
  1c:   41 b8 01 00 00 00       mov    $0x1,%r8d
  22:   31 c9                   xor    %ecx,%ecx
  24:   be 56 55 55 55          mov    $0x55555556,%esi
  29:   eb 1f                   jmp    4a <array+0x4a>
  2b:   0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
  30:   89 c8                   mov    %ecx,%eax
  32:   f7 ee                   imul   %esi
  34:   89 c8                   mov    %ecx,%eax
  36:   c1 f8 1f                sar    $0x1f,%eax
  39:   29 c2                   sub    %eax,%edx
  3b:   8d 04 52                lea    (%rdx,%rdx,2),%eax
  3e:   89 ca                   mov    %ecx,%edx
  40:   29 c2                   sub    %eax,%edx
  42:   48 63 c2                movslq %edx,%rax
  45:   44 8b 44 84 e8          mov    -0x18(%rsp,%rax,4),%r8d
  4a:   89 c8                   mov    %ecx,%eax
  4c:   f7 ee                   imul   %esi
  4e:   89 c8                   mov    %ecx,%eax
  50:   c1 f8 1f                sar    $0x1f,%eax
  53:   29 c2                   sub    %eax,%edx
  55:   8d 04 52                lea    (%rdx,%rdx,2),%eax
  58:   89 ca                   mov    %ecx,%edx
  5a:   83 c1 01                add    $0x1,%ecx
  5d:   29 c2                   sub    %eax,%edx
  5f:   39 f9                   cmp    %edi,%ecx
  61:   48 63 c2                movslq %edx,%rax
  64:   41 8d 50 01             lea    0x1(%r8),%edx
  68:   89 54 84 e8             mov    %edx,-0x18(%rsp,%rax,4)
  6c:   75 c2                   jne    30 <array+0x30>
  6e:   8b 44 24 ec             mov    -0x14(%rsp),%eax
  72:   03 44 24 e8             add    -0x18(%rsp),%eax
  76:   03 44 24 f0             add    -0x10(%rsp),%eax
  7a:   c3                      retq   
  7b:   b8 0d 00 00 00          mov    $0xd,%eax
  80:   c3                      retq   
  81:   66 66 66 66 66 66 2e    data32 data32 data32 data32 data32 nopw %cs:0x0(%rax,%rax,1)
  88:   0f 1f 84 00 00 00 00 
  8f:   00 

  0000000000000090 <vars>:
  90:   85 ff                   test   %edi,%edi
  92:   7e 52                   jle    e6 <vars+0x56>
  94:   31 c9                   xor    %ecx,%ecx
  96:   41 b8 07 00 00 00       mov    $0x7,%r8d
  9c:   41 b9 05 00 00 00       mov    $0x5,%r9d
  a2:   41 bb 01 00 00 00       mov    $0x1,%r11d
  a8:   41 ba 56 55 55 55       mov    $0x55555556,%r10d
  ae:   89 c8                   mov    %ecx,%eax
  b0:   89 ce                   mov    %ecx,%esi
  b2:   41 f7 ea                imul   %r10d
  b5:   c1 fe 1f                sar    $0x1f,%esi
  b8:   89 c8                   mov    %ecx,%eax
  ba:   29 f2                   sub    %esi,%edx
  bc:   8d 14 52                lea    (%rdx,%rdx,2),%edx
  bf:   29 d0                   sub    %edx,%eax
  c1:   83 f8 01                cmp    $0x1,%eax
  c4:   74 09                   je     cf <vars+0x3f>
  c6:   83 f8 02                cmp    $0x2,%eax
  c9:   74 08                   je     d3 <vars+0x43>
  cb:   41 83 c3 01             add    $0x1,%r11d
  cf:   41 83 c1 01             add    $0x1,%r9d
  d3:   83 c1 01                add    $0x1,%ecx
  d6:   41 83 c0 01             add    $0x1,%r8d
  da:   39 f9                   cmp    %edi,%ecx
  dc:   75 d0                   jne    ae <vars+0x1e>
  de:   45 01 d9                add    %r11d,%r9d
  e1:   43 8d 04 01             lea    (%r9,%r8,1),%eax
  e5:   c3                      retq   
  e6:   b8 0d 00 00 00          mov    $0xd,%eax
  eb:   c3                      retq
Arrays mean memory - don't use them unless you actually want the compiler to use memory.



Your example has nothing to do with the actual code in the kernel. In the original code, the counters are passed by reference to a function. That means they need to be in memory anyway, right? The compiler can't pass the address of a register.


> That means they need to be in memory anyway, right?

Actually no, GCC can optimize out pointer indirection like that if it ends up inlining the function (which it could in the kernel example, since it's static, relatively short, and has only three callers).

Another contrived example:

  static int inlineme(int *lol, int lolol)
  {
        *lol += lolol;
        return *lol * 5;
  }

  int test(int x)
  {
        int tmp = 5;
        tmp += inlineme(&tmp, x);
        return tmp;
  }
... all of which compiles to (with -O2):

  0000000000000000 <test>:
   0:   83 c7 05                add    $0x5,%edi
   3:   8d 04 bf                lea    (%rdi,%rdi,4),%eax
   6:   01 f8                   add    %edi,%eax
   8:   c3                      retq
Note that "tmp" doesn't have to be a constant for that to happen.

As far as the actual kernel function, GCC ends up emitting: (The only uncompressed kernel binary I had laying around was for my beaglebone black, sorry)

  c0136fa0 <verify_reserved_gdb>:
  c0136fa0:       e92d4ff0        push    {r4, r5, r6, r7, r8, r9, sl, fp, lr}
  c0136fa4:       e24dd02c        sub     sp, sp, #44     ; 0x2c
  <snip>
  c0136fbc:       e3a03001        mov     r3, #1
  c0136fc0:       e58d301c        str     r3, [sp, #28]
  c0136fc4:       e3a03005        mov     r3, #5
  c0136fc8:       e58d3020        str     r3, [sp, #32]
  c0136fcc:       e3a03007        mov     r3, #7
  c0136fd0:       e58d3024        str     r3, [sp, #36]   ; 0x24
  c0136fd4:       e1a00007        mov     r0, r7
  c0136fd8:       e28d101c        add     r1, sp, #28
  c0136fdc:       e28d2020        add     r2, sp, #32
  c0136fe0:       e28d3024        add     r3, sp, #36     ; 0x24
  c0136fe4:       ebffffd4        bl      c0136f3c <ext4_list_backups>
... so in this case it does actually pass by reference.

That doesn't change my argument: an array is the more confusing way to do this, and in similar situations could prevent the compiler from making the optimization it did in my example.


This is filesystem code. Any miniscule performance gain would be eclipsed by the actual reads/writes. This code should really be -Os'd to save cache.


You're missing the point: it's not that using an array is slower, it's that "int n[3]" can mean something completely different than "int a,b,c" does. A lot of comments on this thread have suggested that the two are interchangeable: they aren't.

Using an array here is wrong: it needlessly complicates the code (now in addition to everything else, I have to remember how big the array is and what each (unnamed) index corresponds to), possibly makes the compiler emit worthless loads/stores, and let's me potentially blow up the world if I mess up and access out-of-bounds.




Guidelines | FAQ | Lists | API | Security | Legal | Apply to YC | Contact

Search: