5a0e9b5718
Nick Piggin discovered that lwsync barriers around locks were faster than isync on 970. That was a long time ago and I completely dropped the ball in testing his patches across other ppc64 processors. Turns out the idea helps on other chips. Using a microbenchmark that uses a lot of threads to contend on a global pthread mutex (and therefore a global futex), POWER6 improves 8% and POWER7 improves 2%. I checked POWER5 and while I couldn't measure an improvement, there was no regression. This patch uses the lwsync patching code to replace the isyncs with lwsyncs on CPUs that support the instruction. We were marking POWER3 and RS64 as lwsync capable but in reality they treat it as a full sync (ie slow). Remove the CPU_FTR_LWSYNC bit from these CPUs so they continue to use the faster isync method. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
52 lines
1.2 KiB
C
52 lines
1.2 KiB
C
#ifndef _ASM_POWERPC_SYNCH_H
|
|
#define _ASM_POWERPC_SYNCH_H
|
|
#ifdef __KERNEL__
|
|
|
|
#include <linux/stringify.h>
|
|
#include <asm/feature-fixups.h>
|
|
|
|
#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC)
|
|
#define __SUBARCH_HAS_LWSYNC
|
|
#endif
|
|
|
|
#ifndef __ASSEMBLY__
|
|
extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup;
|
|
extern void do_lwsync_fixups(unsigned long value, void *fixup_start,
|
|
void *fixup_end);
|
|
|
|
static inline void eieio(void)
|
|
{
|
|
__asm__ __volatile__ ("eieio" : : : "memory");
|
|
}
|
|
|
|
static inline void isync(void)
|
|
{
|
|
__asm__ __volatile__ ("isync" : : : "memory");
|
|
}
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#if defined(__powerpc64__)
|
|
# define LWSYNC lwsync
|
|
#elif defined(CONFIG_E500)
|
|
# define LWSYNC \
|
|
START_LWSYNC_SECTION(96); \
|
|
sync; \
|
|
MAKE_LWSYNC_SECTION_ENTRY(96, __lwsync_fixup);
|
|
#else
|
|
# define LWSYNC sync
|
|
#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
#define __PPC_ACQUIRE_BARRIER \
|
|
START_LWSYNC_SECTION(97); \
|
|
isync; \
|
|
MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup);
|
|
#define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER)
|
|
#define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n"
|
|
#else
|
|
#define PPC_ACQUIRE_BARRIER
|
|
#define PPC_RELEASE_BARRIER
|
|
#endif
|
|
|
|
#endif /* __KERNEL__ */
|
|
#endif /* _ASM_POWERPC_SYNCH_H */
|