linux/arch/ia64/kernel/patch.c

/*
 * Instruction-patching support.
 *
 * Copyright (C) 2003 Hewlett-Packard Co
 *	David Mosberger-Tang <davidm@hpl.hp.com>
 */
#include <linux/init.h>
#include <linux/string.h>

#include <asm/patch.h>
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/system.h>
#include <asm/unistd.h>

/*
 * This was adapted from code written by Tony Luck:
 *
 * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle
 * like this:
 *
 * 6  6         5         4         3         2         1
 * 3210987654321098765432109876543210987654321098765432109876543210
 * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG
 *
 * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB
 */
static u64
get_imm64 (u64 insn_addr)
{
	u64 *p = (u64 *) (insn_addr & -16);	/* mask out slot number */

	return ( (p[1] & 0x0800000000000000UL) << 4)  | /*A*/
		((p[1] & 0x00000000007fffffUL) << 40) | /*B*/
		((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/
		((p[1] & 0x0000100000000000UL) >> 23) | /*D*/
		((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/
		((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/
		((p[1] & 0x000007f000000000UL) >> 36);  /*G*/
}

/* Patch instruction with "val" where "mask" has 1 bits. */
void
ia64_patch (u64 insn_addr, u64 mask, u64 val)
{
	u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16);
#	define insn_mask ((1UL << 41) - 1)
	unsigned long shift;

	b0 = b[0]; b1 = b[1];
	shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */
	if (shift >= 64) {
		m1 = mask << (shift - 64);
		v1 = val << (shift - 64);
	} else {
		m0 = mask << shift; m1 = mask >> (64 - shift);
		v0 = val  << shift; v1 = val >> (64 - shift);
		b[0] = (b0 & ~m0) | (v0 & m0);
	}
	b[1] = (b1 & ~m1) | (v1 & m1);
}

void
ia64_patch_imm64 (u64 insn_addr, u64 val)
{
	/* The assembler may generate offset pointing to either slot 1
	   or slot 2 for a long (2-slot) instruction, occupying slots 1
	   and 2.  */
  	insn_addr &= -16UL;
	ia64_patch(insn_addr + 2,
		   0x01fffefe000UL, (  ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */
				     | ((val & 0x0000000000200000UL) <<  0) /* bit 21 -> 21 */
				     | ((val & 0x00000000001f0000UL) <<  6) /* bit 16 -> 22 */
				     | ((val & 0x000000000000ff80UL) << 20) /* bit  7 -> 27 */
				     | ((val & 0x000000000000007fUL) << 13) /* bit  0 -> 13 */));
	ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22);
}

void
ia64_patch_imm60 (u64 insn_addr, u64 val)
{
	/* The assembler may generate offset pointing to either slot 1
	   or slot 2 for a long (2-slot) instruction, occupying slots 1
	   and 2.  */
  	insn_addr &= -16UL;
	ia64_patch(insn_addr + 2,
		   0x011ffffe000UL, (  ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */
				     | ((val & 0x00000000000fffffUL) << 13) /* bit  0 -> 13 */));
	ia64_patch(insn_addr + 1, 0x1fffffffffcUL, val >> 18);
}

/*
 * We need sometimes to load the physical address of a kernel
 * object.  Often we can convert the virtual address to physical
 * at execution time, but sometimes (either for performance reasons
 * or during error recovery) we cannot to this.  Patch the marked
 * bundles to load the physical address.
 */
void __init
ia64_patch_vtop (unsigned long start, unsigned long end)
{
	s32 *offp = (s32 *) start;
	u64 ip;

	while (offp < (s32 *) end) {
		ip = (u64) offp + *offp;

		/* replace virtual address with corresponding physical address: */
		ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip)));
		ia64_fc((void *) ip);
		++offp;
	}
	ia64_sync_i();
	ia64_srlz_i();
}

void __init
ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
{
	static int first_time = 1;
	int need_workaround;
	s32 *offp = (s32 *) start;
	u64 *wp;

	need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0);

	if (first_time) {
		first_time = 0;
		if (need_workaround)
			printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n");
		else
			printk(KERN_INFO "McKinley Errata 9 workaround not needed; "
			       "disabling it\n");
	}
	if (need_workaround)
		return;

	while (offp < (s32 *) end) {
		wp = (u64 *) ia64_imva((char *) offp + *offp);
		wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
		wp[1] = 0x0004000000000200UL;
		wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
		wp[3] = 0x0084006880000200UL;
		ia64_fc(wp); ia64_fc(wp + 2);
		++offp;
	}
	ia64_sync_i();
	ia64_srlz_i();
}

static void __init
patch_fsyscall_table (unsigned long start, unsigned long end)
{
	extern unsigned long fsyscall_table[NR_syscalls];
	s32 *offp = (s32 *) start;
	u64 ip;

	while (offp < (s32 *) end) {
		ip = (u64) ia64_imva((char *) offp + *offp);
		ia64_patch_imm64(ip, (u64) fsyscall_table);
		ia64_fc((void *) ip);
		++offp;
	}
	ia64_sync_i();
	ia64_srlz_i();
}

static void __init
patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
{
	extern char fsys_bubble_down[];
	s32 *offp = (s32 *) start;
	u64 ip;

	while (offp < (s32 *) end) {
		ip = (u64) offp + *offp;
		ia64_patch_imm60((u64) ia64_imva((void *) ip),
				 (u64) (fsys_bubble_down - (ip & -16)) / 16);
		ia64_fc((void *) ip);
		++offp;
	}
	ia64_sync_i();
	ia64_srlz_i();
}

void __init
ia64_patch_gate (void)
{
#	define START(name)	((unsigned long) __start_gate_##name##_patchlist)
#	define END(name)	((unsigned long)__end_gate_##name##_patchlist)

	patch_fsyscall_table(START(fsyscall), END(fsyscall));
	patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down));
	ia64_patch_vtop(START(vtop), END(vtop));
	ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
}

void ia64_patch_phys_stack_reg(unsigned long val)
{
	s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;
	s32 * end = (s32 *) __end___phys_stack_reg_patchlist;
	u64 ip, mask, imm;

	/* see instruction format A4: adds r1 = imm13, r3 */
	mask = (0x3fUL << 27) | (0x7f << 13);
	imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13;

	while (offp < end) {
		ip = (u64) offp + *offp;
		ia64_patch(ip, mask, imm);
		ia64_fc(ip);
		++offp;
	}
	ia64_sync_i();
	ia64_srlz_i();
}
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`/*`
			`* Instruction-patching support.`
			`*`
			`* Copyright (C) 2003 Hewlett-Packard Co`
			`* David Mosberger-Tang <davidm@hpl.hp.com>`
			`*/`
			`#include <linux/init.h>`
			`#include <linux/string.h>`

			`#include <asm/patch.h>`
			`#include <asm/processor.h>`
			`#include <asm/sections.h>`
			`#include <asm/system.h>`
			`#include <asm/unistd.h>`

			`/*`
			`* This was adapted from code written by Tony Luck:`
			`*`
			`* The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle`
			`* like this:`
			`*`
			`* 6 6 5 4 3 2 1`
			`* 3210987654321098765432109876543210987654321098765432109876543210`
			`* ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG`
			`*`
			`* CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`
			`* xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB`
			`*/`
			`static u64`
			`get_imm64 (u64 insn_addr)`
			`{`
			`u64 p = (u64 ) (insn_addr & -16); /* mask out slot number */`

			`return ( (p[1] & 0x0800000000000000UL) << 4) \| /A/`
			`((p[1] & 0x00000000007fffffUL) << 40) \| /B/`
			`((p[0] & 0xffffc00000000000UL) >> 24) \| /C/`
			`((p[1] & 0x0000100000000000UL) >> 23) \| /D/`
			`((p[1] & 0x0003e00000000000UL) >> 29) \| /E/`
			`((p[1] & 0x07fc000000000000UL) >> 43) \| /F/`
			`((p[1] & 0x000007f000000000UL) >> 36); /G/`
			`}`

			`/* Patch instruction with "val" where "mask" has 1 bits. */`
			`void`
			`ia64_patch (u64 insn_addr, u64 mask, u64 val)`
			`{`
			`u64 m0, m1, v0, v1, b0, b1, b = (u64 ) (insn_addr & -16);`
			`# define insn_mask ((1UL << 41) - 1)`
			`unsigned long shift;`

			`b0 = b[0]; b1 = b[1];`
			`shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */`
			`if (shift >= 64) {`
			`m1 = mask << (shift - 64);`
			`v1 = val << (shift - 64);`
			`} else {`
			`m0 = mask << shift; m1 = mask >> (64 - shift);`
			`v0 = val << shift; v1 = val >> (64 - shift);`
			`b[0] = (b0 & ~m0) \| (v0 & m0);`
			`}`
			`b[1] = (b1 & ~m1) \| (v1 & m1);`
			`}`

			`void`
			`ia64_patch_imm64 (u64 insn_addr, u64 val)`
			`{`
[IA64] Fix 2.6 kernel for the new ia64 assembler The new ia64 assembler uses slot 1 for the offset of a long (2-slot) instruction and the old assembler uses slot 2. The 2.6 kernel assumes slot 2 and won't boot when the new assembler is used: http://sources.redhat.com/bugzilla/show_bug.cgi?id=1433 This patch will work with either slot 1 or 2. Patch provided by H.J. Lu Signed-off-by: Tony Luck <tony.luck@intel.com> 2005-10-07 18:01:19 +00:00			`/* The assembler may generate offset pointing to either slot 1`
			`or slot 2 for a long (2-slot) instruction, occupying slots 1`
			`and 2. */`
			`insn_addr &= -16UL;`
			`ia64_patch(insn_addr + 2,`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`0x01fffefe000UL, ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */`
			`\| ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */`
			`\| ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */`
			`\| ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */`
			`\| ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */));`
[IA64] Fix 2.6 kernel for the new ia64 assembler The new ia64 assembler uses slot 1 for the offset of a long (2-slot) instruction and the old assembler uses slot 2. The 2.6 kernel assumes slot 2 and won't boot when the new assembler is used: http://sources.redhat.com/bugzilla/show_bug.cgi?id=1433 This patch will work with either slot 1 or 2. Patch provided by H.J. Lu Signed-off-by: Tony Luck <tony.luck@intel.com> 2005-10-07 18:01:19 +00:00			`ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22);`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`}`

			`void`
			`ia64_patch_imm60 (u64 insn_addr, u64 val)`
			`{`
[IA64] Fix 2.6 kernel for the new ia64 assembler The new ia64 assembler uses slot 1 for the offset of a long (2-slot) instruction and the old assembler uses slot 2. The 2.6 kernel assumes slot 2 and won't boot when the new assembler is used: http://sources.redhat.com/bugzilla/show_bug.cgi?id=1433 This patch will work with either slot 1 or 2. Patch provided by H.J. Lu Signed-off-by: Tony Luck <tony.luck@intel.com> 2005-10-07 18:01:19 +00:00			`/* The assembler may generate offset pointing to either slot 1`
			`or slot 2 for a long (2-slot) instruction, occupying slots 1`
			`and 2. */`
			`insn_addr &= -16UL;`
			`ia64_patch(insn_addr + 2,`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`0x011ffffe000UL, ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */`
			`\| ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */));`
[IA64] Fix 2.6 kernel for the new ia64 assembler The new ia64 assembler uses slot 1 for the offset of a long (2-slot) instruction and the old assembler uses slot 2. The 2.6 kernel assumes slot 2 and won't boot when the new assembler is used: http://sources.redhat.com/bugzilla/show_bug.cgi?id=1433 This patch will work with either slot 1 or 2. Patch provided by H.J. Lu Signed-off-by: Tony Luck <tony.luck@intel.com> 2005-10-07 18:01:19 +00:00			`ia64_patch(insn_addr + 1, 0x1fffffffffcUL, val >> 18);`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`}`

			`/*`
			`* We need sometimes to load the physical address of a kernel`
			`* object. Often we can convert the virtual address to physical`
			`* at execution time, but sometimes (either for performance reasons`
			`* or during error recovery) we cannot to this. Patch the marked`
			`* bundles to load the physical address.`
			`*/`
			`void __init`
			`ia64_patch_vtop (unsigned long start, unsigned long end)`
			`{`
			`s32 offp = (s32 ) start;`
			`u64 ip;`

			`while (offp < (s32 *) end) {`
			`ip = (u64) offp + *offp;`

			`/* replace virtual address with corresponding physical address: */`
			`ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip)));`
			`ia64_fc((void *) ip);`
			`++offp;`
			`}`
			`ia64_sync_i();`
			`ia64_srlz_i();`
			`}`

[IA64] add init declaration - gate page functions Add init declaration to bunch of patch functions and gate page setup function. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> 2006-03-12 17:08:26 +00:00			`void __init`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)`
			`{`
			`static int first_time = 1;`
			`int need_workaround;`
			`s32 offp = (s32 ) start;`
			`u64 *wp;`

			`need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0);`

			`if (first_time) {`
			`first_time = 0;`
			`if (need_workaround)`
			`printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n");`
			`else`
			`printk(KERN_INFO "McKinley Errata 9 workaround not needed; "`
			`"disabling it\n");`
			`}`
			`if (need_workaround)`
			`return;`

			`while (offp < (s32 *) end) {`
			`wp = (u64 ) ia64_imva((char ) offp + *offp);`
			`wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */`
			`wp[1] = 0x0004000000000200UL;`
			`wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */`
			`wp[3] = 0x0084006880000200UL;`
			`ia64_fc(wp); ia64_fc(wp + 2);`
			`++offp;`
			`}`
			`ia64_sync_i();`
			`ia64_srlz_i();`
			`}`

[IA64] add init declaration - gate page functions Add init declaration to bunch of patch functions and gate page setup function. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> 2006-03-12 17:08:26 +00:00			`static void __init`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`patch_fsyscall_table (unsigned long start, unsigned long end)`
			`{`
			`extern unsigned long fsyscall_table[NR_syscalls];`
			`s32 offp = (s32 ) start;`
			`u64 ip;`

			`while (offp < (s32 *) end) {`
			`ip = (u64) ia64_imva((char ) offp + offp);`
			`ia64_patch_imm64(ip, (u64) fsyscall_table);`
			`ia64_fc((void *) ip);`
			`++offp;`
			`}`
			`ia64_sync_i();`
			`ia64_srlz_i();`
			`}`

[IA64] add init declaration - gate page functions Add init declaration to bunch of patch functions and gate page setup function. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> 2006-03-12 17:08:26 +00:00			`static void __init`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)`
			`{`
			`extern char fsys_bubble_down[];`
			`s32 offp = (s32 ) start;`
			`u64 ip;`

			`while (offp < (s32 *) end) {`
			`ip = (u64) offp + *offp;`
			`ia64_patch_imm60((u64) ia64_imva((void *) ip),`
			`(u64) (fsys_bubble_down - (ip & -16)) / 16);`
			`ia64_fc((void *) ip);`
			`++offp;`
			`}`
			`ia64_sync_i();`
			`ia64_srlz_i();`
			`}`

[IA64] add init declaration - gate page functions Add init declaration to bunch of patch functions and gate page setup function. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> 2006-03-12 17:08:26 +00:00			`void __init`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 22:20:36 +00:00			`ia64_patch_gate (void)`
			`{`
			`# define START(name) ((unsigned long) __start_gate_##name##_patchlist)`
			`# define END(name) ((unsigned long)__end_gate_##name##_patchlist)`

			`patch_fsyscall_table(START(fsyscall), END(fsyscall));`
			`patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down));`
			`ia64_patch_vtop(START(vtop), END(vtop));`
			`ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));`
			`}`
[IA64] remove per-cpu ia64_phys_stacked_size_p8 It's not efficient to use a per-cpu variable just to store how many physical stack register a cpu has. Ever since the incarnation of ia64 up till upcoming Montecito processor, that variable has "glued" to 96. Having a variable in memory means that the kernel is burning an extra cacheline access on every syscall and kernel exit path. Such "static" value is better served with the instruction patching utility exists today. Convert ia64_phys_stacked_size_p8 into dynamic insn patching. This also has a pleasant side effect of eliminating access to per-cpu area while psr.ic=0 in the kernel exit path. (fixable for per-cpu DTC work, but why bother?) There are some concerns with the default value that the instruc- tion encoded in the kernel image. It shouldn't be concerned. The reasons are: (1) cpu_init() is called at CPU initialization. In there, we find out physical stack register size from PAL and patch two instructions in kernel exit code. The code in question can not be executed before the patching is done. (2) current implementation stores zero in ia64_phys_stacked_size_p8, and that's what the current kernel exit path loads the value with. With the new code, it is equivalent that we store reg size 96 in ia64_phys_stacked_size_p8, thus creating a better safety net. Given (1) above can never fail, having (2) is just a bonus. All in all, this patch allow one less memory reference in the kernel exit path, thus reducing syscall and interrupt return latency; and avoid polluting potential useful data in the CPU cache. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> 2006-10-13 17:05:45 +00:00
			`void ia64_patch_phys_stack_reg(unsigned long val)`
			`{`
			`s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;`
			`s32 * end = (s32 *) __end___phys_stack_reg_patchlist;`
			`u64 ip, mask, imm;`

			`/* see instruction format A4: adds r1 = imm13, r3 */`
			`mask = (0x3fUL << 27) \| (0x7f << 13);`
			`imm = (((val >> 7) & 0x3f) << 27) \| (val & 0x7f) << 13;`

			`while (offp < end) {`
			`ip = (u64) offp + *offp;`
			`ia64_patch(ip, mask, imm);`
			`ia64_fc(ip);`
			`++offp;`
			`}`
			`ia64_sync_i();`
			`ia64_srlz_i();`
			`}`