Michal Nazarewicz
2e1d06e1c0
include/linux/kernel.h: rewrite min3, max3 and clamp using min and max
It appears that gcc is better at optimising a double call to min and max
rather than open coded min3 and max3. This can be observed here:
$ cat min-max.c
#define min(x, y) ({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \
(void) (&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; })
#define min3(x, y, z) ({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \
typeof(z) _min3 = (z); \
(void) (&_min1 == &_min2); \
(void) (&_min1 == &_min3); \
_min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
(_min2 < _min3 ? _min2 : _min3); })
int fmin3(int x, int y, int z) { return min3(x, y, z); }
int fmin2(int x, int y, int z) { return min(min(x, y), z); }
$ gcc -O2 -o min-max.s -S min-max.c; cat min-max.s
.file "min-max.c"
.text
.p2align 4,,15
.globl fmin3
.type fmin3, @function
fmin3:
.LFB0:
.cfi_startproc
cmpl %esi, %edi
jl .L5
cmpl %esi, %edx
movl %esi, %eax
cmovle %edx, %eax
ret
.p2align 4,,10
.p2align 3
.L5:
cmpl %edi, %edx
movl %edi, %eax
cmovle %edx, %eax
ret
.cfi_endproc
.LFE0:
.size fmin3, .-fmin3
.p2align 4,,15
.globl fmin2
.type fmin2, @function
fmin2:
.LFB1:
.cfi_startproc
cmpl %edi, %esi
movl %edx, %eax
cmovle %esi, %edi
cmpl %edx, %edi
cmovle %edi, %eax
ret
.cfi_endproc
.LFE1:
.size fmin2, .-fmin2
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",@progbits
fmin3 function, which uses open-coded min3 macro, is compiled into total
of ten instructions including a conditional branch, whereas fmin2
function, which uses two calls to min2 macro, is compiled into six
instructions with no branches.
Similarly, open-coded clamp produces the same code as clamp using min and
max macros, but the latter is much shorter:
$ cat clamp.c
#define clamp(val, min, max) ({ \
typeof(val) __val = (val); \
typeof(min) __min = (min); \
typeof(max) __max = (max); \
(void) (&__val == &__min); \
(void) (&__val == &__max); \
__val = __val < __min ? __min: __val; \
__val > __max ? __max: __val; })
#define min(x, y) ({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \
(void) (&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; })
#define max(x, y) ({ \
typeof(x) _max1 = (x); \
typeof(y) _max2 = (y); \
(void) (&_max1 == &_max2); \
_max1 > _max2 ? _max1 : _max2; })
int fclamp(int v, int min, int max) { return clamp(v, min, max); }
int fclampmm(int v, int min, int max) { return min(max(v, min), max); }
$ gcc -O2 -o clamp.s -S clamp.c; cat clamp.s
.file "clamp.c"
.text
.p2align 4,,15
.globl fclamp
.type fclamp, @function
fclamp:
.LFB0:
.cfi_startproc
cmpl %edi, %esi
movl %edx, %eax
cmovge %esi, %edi
cmpl %edx, %edi
cmovle %edi, %eax
ret
.cfi_endproc
.LFE0:
.size fclamp, .-fclamp
.p2align 4,,15
.globl fclampmm
.type fclampmm, @function
fclampmm:
.LFB1:
.cfi_startproc
cmpl %edi, %esi
cmovge %esi, %edi
cmpl %edi, %edx
movl %edi, %eax
cmovle %edx, %eax
ret
.cfi_endproc
.LFE1:
.size fclampmm, .-fclampmm
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",@progbits
Linux mpn-glaptop 3.13.0-29-generic #53~precise1-Ubuntu SMP Wed Jun 4 22:06:25 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux
gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3
Copyright (C) 2011 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-rwx------ 1 mpn eng 51224656 Jun 17 14:15 vmlinux.before
-rwx------ 1 mpn eng 51224608 Jun 17 13:57 vmlinux.after
48 bytes reduction. The do_fault_around was a few instruction shorter
and as far as I can tell saved 12 bytes on the stack, i.e.:
$ grep -e rsp -e pop -e push do_fault_around.*
do_fault_around.before.s:push %rbp
do_fault_around.before.s:mov %rsp,%rbp
do_fault_around.before.s:push %r13
do_fault_around.before.s:push %r12
do_fault_around.before.s:push %rbx
do_fault_around.before.s:sub $0x38,%rsp
do_fault_around.before.s:add $0x38,%rsp
do_fault_around.before.s:pop %rbx
do_fault_around.before.s:pop %r12
do_fault_around.before.s:pop %r13
do_fault_around.before.s:pop %rbp
do_fault_around.after.s:push %rbp
do_fault_around.after.s:mov %rsp,%rbp
do_fault_around.after.s:push %r12
do_fault_around.after.s:push %rbx
do_fault_around.after.s:sub $0x30,%rsp
do_fault_around.after.s:add $0x30,%rsp
do_fault_around.after.s:pop %rbx
do_fault_around.after.s:pop %r12
do_fault_around.after.s:pop %rbp
or here side-by-side:
Before After
push %rbp push %rbp
mov %rsp,%rbp mov %rsp,%rbp
push %r13
push %r12 push %r12
push %rbx push %rbx
sub $0x38,%rsp sub $0x30,%rsp
add $0x38,%rsp add $0x30,%rsp
pop %rbx pop %rbx
pop %r12 pop %r12
pop %r13
pop %rbp pop %rbp
There are also fewer branches:
$ grep ^j do_fault_around.*
do_fault_around.before.s:jae ffffffff812079b7
do_fault_around.before.s:jmp ffffffff812079c5
do_fault_around.before.s:jmp ffffffff81207a14
do_fault_around.before.s:ja ffffffff812079f9
do_fault_around.before.s:jb ffffffff81207a10
do_fault_around.before.s:jmp ffffffff81207a63
do_fault_around.before.s:jne ffffffff812079df
do_fault_around.after.s:jmp ffffffff812079fd
do_fault_around.after.s:ja ffffffff812079e2
do_fault_around.after.s:jb ffffffff812079f9
do_fault_around.after.s:jmp ffffffff81207a4c
do_fault_around.after.s:jne ffffffff812079c8
And here's with allyesconfig on a different machine:
$ uname -a; gcc --version; ls -l vmlinux.*
Linux erwin 3.14.7-mn #54 SMP Sun Jun 15 11:25:08 CEST 2014 x86_64 AMD Phenom(tm) II X3 710 Processor AuthenticAMD GNU/Linux
gcc (GCC) 4.8.3
Copyright (C) 2013 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-rwx------ 1 mpn eng 437027411 Jun 20 16:04 vmlinux.before
-rwx------ 1 mpn eng 437026881 Jun 20 15:30 vmlinux.after
530 bytes reduction.
Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: David Rientjes <rientjes@google.com>
Cc: "Rustad, Mark D" <mark.d.rustad@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-09 22:26:03 -04:00
..
2014-09-24 10:25:52 +01:00
2014-09-30 13:17:14 -04:00
2014-08-06 18:01:23 -07:00
2014-09-22 11:38:59 +02:00
2014-08-08 15:57:26 -07:00
2014-09-22 16:48:47 +09:00
2014-10-07 21:16:26 -04:00
2014-08-09 09:15:07 -07:00
2014-08-25 21:47:22 +01:00
2014-08-07 23:37:07 -07:00
2014-10-09 06:42:04 -04:00
2014-10-07 21:26:52 -04:00
2014-10-06 01:04:15 -04:00
2014-10-03 15:42:32 -07:00
2014-08-11 11:42:39 +02:00
2014-08-19 11:55:38 -07:00
2014-09-15 22:20:21 +02:00
2014-07-22 12:46:11 +05:30
2014-09-04 19:21:45 +02:00
2014-10-08 17:13:04 -04:00
2014-09-30 13:50:31 +01:00
2014-09-24 11:53:39 -07:00
2014-08-30 19:28:04 +01:00
2014-07-18 13:45:24 -04:00
2014-10-08 12:51:44 -04:00
2014-09-29 11:54:02 -04:00
2014-08-07 14:40:09 -04:00
2014-08-09 09:15:07 -07:00
2014-07-30 10:42:55 -07:00
2014-07-30 15:39:46 -04:00
2014-09-08 10:54:03 +02:00
2014-10-09 22:26:01 -04:00
2014-08-06 18:01:27 -07:00
2014-08-15 12:38:41 -06:00
2014-09-26 15:05:15 -04:00
2014-10-01 22:12:48 -04:00
2014-07-24 21:53:47 +10:00
2014-09-24 14:23:34 +08:00
2014-09-09 10:34:56 +02:00
2014-07-23 15:01:52 -07:00
2014-08-06 18:01:16 -07:00
2014-09-29 14:36:26 -04:00
2014-10-09 22:25:54 -04:00
2014-07-21 13:43:19 +02:00
2014-09-24 22:16:06 -04:00
2014-08-01 22:35:55 +08:00
2014-09-14 17:28:32 -07:00
2014-09-23 22:53:15 -07:00
2014-09-23 23:32:50 -07:00
2014-08-06 18:01:16 -07:00
2014-10-07 20:39:25 -04:00
2014-07-23 16:04:47 +02:00
2014-10-03 14:55:48 -07:00
2014-09-29 00:04:55 -04:00
2014-08-08 15:57:33 -07:00
2014-09-05 17:47:02 -07:00
2014-09-05 12:12:20 -07:00
2014-09-23 11:10:20 -07:00
2014-09-26 15:05:15 -04:00
2014-10-08 16:01:41 -04:00
2014-08-07 14:40:08 -04:00
2014-08-11 11:44:11 -07:00
2014-08-06 18:01:12 -07:00
2014-08-09 17:33:44 -07:00
2014-08-22 13:18:48 -04:00
2014-10-09 22:25:52 -04:00
2014-10-09 22:25:55 -04:00
2014-08-06 18:01:24 -07:00
2014-09-13 11:24:03 -07:00
2014-10-06 23:34:40 +02:00
2014-08-06 18:01:22 -07:00
2014-08-04 10:07:36 +02:00
2014-07-23 10:17:54 -07:00
2014-10-09 22:25:57 -04:00
2014-08-06 18:01:19 -07:00
2014-09-25 16:07:15 +02:00
2014-09-11 12:21:18 +02:00
2014-09-29 15:37:01 -04:00
2014-08-11 12:16:51 -07:00
2014-09-04 22:26:14 -07:00
2014-07-25 11:47:46 -07:00
2014-07-23 12:02:30 +02:00
2014-08-08 15:57:32 -07:00
2014-07-23 16:04:47 +02:00
2014-09-23 23:13:13 -07:00
2014-09-13 18:38:15 +02:00
2014-09-03 12:57:27 +00:00
2014-08-02 15:03:58 -07:00
2014-08-28 22:22:29 -04:00
2014-09-12 13:59:03 -07:00
2014-10-09 22:26:03 -04:00
2014-08-08 15:57:33 -07:00
2014-07-22 21:46:08 +01:00
2014-07-22 21:55:45 +01:00
2014-09-09 10:34:56 +02:00
2014-08-06 18:01:24 -07:00
2014-07-28 14:16:30 -04:00
2014-07-23 10:17:54 -07:00
2014-09-27 11:03:33 +02:00
2014-08-29 16:35:53 +02:00
2014-09-02 10:02:13 -07:00
2014-07-23 10:30:34 -04:00
2014-08-06 18:01:24 -07:00
2014-09-23 21:06:21 -07:00
2014-09-29 11:56:02 -04:00
2014-08-06 18:01:15 -07:00
2014-10-09 22:25:59 -04:00
2014-10-09 22:25:52 -04:00
2014-10-09 22:25:56 -04:00
2014-10-09 22:26:01 -04:00
2014-08-08 15:57:23 -07:00
2014-10-09 22:26:01 -04:00
2014-10-09 22:25:58 -04:00
2014-09-24 14:07:58 +02:00
2014-10-09 22:25:57 -04:00
2014-07-27 20:52:44 +09:30
2014-07-27 20:52:42 +09:30
2014-09-11 09:59:25 +09:30
2014-08-11 11:44:11 -07:00
2014-07-24 01:15:04 -07:00
2014-10-08 16:08:04 -04:00
2014-09-26 18:42:31 +02:00
2014-08-25 10:45:28 +02:00
2014-09-29 14:35:20 -04:00
2014-09-12 13:50:26 -04:00
2014-08-22 18:04:43 -04:00
2014-09-24 23:22:47 -04:00
2014-07-22 15:05:06 -07:00
2014-08-06 18:01:21 -07:00
2014-07-29 18:08:50 -07:00
2014-07-23 12:02:30 +02:00
2014-07-26 00:21:41 +05:30
2014-07-29 21:26:45 -06:00
2014-08-01 12:22:21 -06:00
2014-08-11 14:06:23 +01:00
2014-09-18 09:53:25 -07:00
2014-07-29 12:38:07 +02:00
2014-08-06 18:01:21 -07:00
2014-08-08 15:57:18 -07:00
2014-08-06 18:01:17 -07:00
2014-10-09 22:26:01 -04:00
2014-07-23 01:00:45 +02:00
2014-09-10 13:45:01 -06:00
2014-08-04 10:09:27 -07:00
2014-10-07 21:16:26 -04:00
2014-10-07 13:06:45 -04:00
2014-08-23 11:39:09 -07:00
2014-08-04 10:07:11 -07:00
2014-09-03 01:25:32 +02:00
2014-07-23 01:00:36 +02:00
2014-07-18 23:40:23 +02:00
2014-08-13 15:13:44 +10:00
2014-09-17 16:33:11 -04:00
2014-08-24 18:36:01 -07:00
2014-08-06 18:01:24 -07:00
2014-07-25 19:01:53 +01:00
2014-09-03 20:56:32 -07:00
2014-08-08 15:57:24 -07:00
2014-10-09 22:25:57 -04:00
2014-09-13 12:30:26 -04:00
2014-08-08 15:57:26 -07:00
2014-10-09 22:25:58 -04:00
2014-07-18 12:13:40 -07:00
2014-07-30 01:31:46 +10:00
2014-08-05 17:46:42 -07:00
2014-08-28 11:59:38 +05:30
2014-09-29 18:20:38 -07:00
2014-09-26 18:01:56 +02:00
2014-07-23 01:10:45 +02:00
2014-08-08 15:57:26 -07:00
2014-08-08 15:57:31 -07:00
2014-08-06 13:03:44 +02:00
2014-10-08 21:40:54 -04:00
2014-10-09 22:25:51 -04:00
2014-10-09 22:25:50 -04:00
2014-07-31 14:13:29 -07:00
2014-10-09 22:25:59 -04:00
2014-09-26 15:05:14 -04:00
2014-08-08 15:57:24 -07:00
2014-10-08 21:40:54 -04:00
2014-07-22 10:12:05 +08:00
2014-09-23 23:14:28 -07:00
2014-09-13 18:34:44 +02:00
2014-07-23 10:17:53 -07:00
2014-07-23 10:17:54 -07:00
2014-07-30 09:26:25 +02:00
2014-07-23 15:01:55 -07:00
2014-07-18 11:49:57 +02:00
2014-10-09 22:25:51 -04:00
2014-07-29 23:10:56 +02:00
2014-08-06 13:03:43 +02:00
2014-08-07 20:39:36 -04:00
2014-09-23 21:19:35 -07:00
2014-09-25 12:17:25 +02:00
2014-09-01 21:36:28 -07:00
2014-09-26 21:16:51 -04:00
2014-09-23 21:40:48 -07:00
2014-09-25 17:05:12 +02:00
2014-08-08 15:57:22 -07:00
2014-08-08 10:39:16 -06:00
2014-09-18 19:21:20 -04:00
2014-09-16 15:02:55 -06:00
2014-10-09 22:26:01 -04:00
2014-08-06 18:01:19 -07:00
2014-09-25 08:23:57 -04:00
2014-09-13 05:13:08 +09:00
2014-08-06 18:01:23 -07:00
2014-08-08 15:57:26 -07:00
2014-08-06 18:01:23 -07:00
2014-10-09 22:26:02 -04:00