648eee52cc
Add a little strncpy optimization which can easily cut boot time by 20%. When the kernel is booting with initramfs, it builds up the filesystem from a cpio archive by calling strncpy_from_user() via fs/namei.c's do_getname() on every file in the archive (which can be lots) with a length of PATH_MAX (1024). This causes the dest of the strncpy to be padded with many NUL bytes. This optimization mostly causes these NUL bytes to be padded with a call to memset() which is already optimized for filling memory quickly, but the hardware loop helps a little bit as well. Boot time measured with 'loglevel=0' so UART speed doesn't get in the way. Signed-off-by: Robin Getz <robin.getz@analog.com> Signed-off-by: Mike Frysinger <vapier@gentoo.org>
87 lines
1.8 KiB
ArmAsm
87 lines
1.8 KiB
ArmAsm
/*
|
|
* Copyright 2004-2009 Analog Devices Inc.
|
|
*
|
|
* Licensed under the ADI BSD license or the GPL-2 (or later)
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
|
|
.align 2
|
|
|
|
#ifdef CONFIG_MEMSET_L1
|
|
.section .l1.text
|
|
#else
|
|
.text
|
|
#endif
|
|
|
|
/*
|
|
* C Library function MEMSET
|
|
* R0 = address (leave unchanged to form result)
|
|
* R1 = filler byte
|
|
* R2 = count
|
|
* Favours word aligned data.
|
|
* The strncpy assumes that I0 and I1 are not used in this function
|
|
*/
|
|
|
|
ENTRY(_memset)
|
|
P0 = R0 ; /* P0 = address */
|
|
P2 = R2 ; /* P2 = count */
|
|
R3 = R0 + R2; /* end */
|
|
CC = R2 <= 7(IU);
|
|
IF CC JUMP .Ltoo_small;
|
|
R1 = R1.B (Z); /* R1 = fill char */
|
|
R2 = 3;
|
|
R2 = R0 & R2; /* addr bottom two bits */
|
|
CC = R2 == 0; /* AZ set if zero. */
|
|
IF !CC JUMP .Lforce_align ; /* Jump if addr not aligned. */
|
|
|
|
.Laligned:
|
|
P1 = P2 >> 2; /* count = n/4 */
|
|
R2 = R1 << 8; /* create quad filler */
|
|
R2.L = R2.L + R1.L(NS);
|
|
R2.H = R2.L + R1.H(NS);
|
|
P2 = R3;
|
|
|
|
LSETUP (.Lquad_loop , .Lquad_loop) LC0=P1;
|
|
.Lquad_loop:
|
|
[P0++] = R2;
|
|
|
|
CC = P0 == P2;
|
|
IF !CC JUMP .Lbytes_left;
|
|
RTS;
|
|
|
|
.Lbytes_left:
|
|
R2 = R3; /* end point */
|
|
R3 = P0; /* current position */
|
|
R2 = R2 - R3; /* bytes left */
|
|
P2 = R2;
|
|
|
|
.Ltoo_small:
|
|
CC = P2 == 0; /* Check zero count */
|
|
IF CC JUMP .Lfinished; /* Unusual */
|
|
|
|
.Lbytes:
|
|
LSETUP (.Lbyte_loop , .Lbyte_loop) LC0=P2;
|
|
.Lbyte_loop:
|
|
B[P0++] = R1;
|
|
|
|
.Lfinished:
|
|
RTS;
|
|
|
|
.Lforce_align:
|
|
CC = BITTST (R0, 0); /* odd byte */
|
|
R0 = 4;
|
|
R0 = R0 - R2;
|
|
P1 = R0;
|
|
R0 = P0; /* Recover return address */
|
|
IF !CC JUMP .Lskip1;
|
|
B[P0++] = R1;
|
|
.Lskip1:
|
|
CC = R2 <= 2; /* 2 bytes */
|
|
P2 -= P1; /* reduce count */
|
|
IF !CC JUMP .Laligned;
|
|
B[P0++] = R1;
|
|
B[P0++] = R1;
|
|
JUMP .Laligned;
|
|
|
|
ENDPROC(_memset)
|