1431559200
Distros generally (I looked at Debian, RHEL5 and SLES11) seem to enable CONFIG_HIGHPTE for any x86 configuration which has highmem enabled. This means that the overhead applies even to machines which have a fairly modest amount of high memory and which therefore do not really benefit from allocating PTEs in high memory but still pay the price of the additional mapping operations. Running kernbench on a 4G box I found that with CONFIG_HIGHPTE=y but no actual highptes being allocated there was a reduction in system time used from 59.737s to 55.9s. With CONFIG_HIGHPTE=y and highmem PTEs being allocated: Average Optimal load -j 4 Run (std deviation): Elapsed Time 175.396 (0.238914) User Time 515.983 (5.85019) System Time 59.737 (1.26727) Percent CPU 263.8 (71.6796) Context Switches 39989.7 (4672.64) Sleeps 42617.7 (246.307) With CONFIG_HIGHPTE=y but with no highmem PTEs being allocated: Average Optimal load -j 4 Run (std deviation): Elapsed Time 174.278 (0.831968) User Time 515.659 (6.07012) System Time 55.9 (1.07799) Percent CPU 263.8 (71.266) Context Switches 39929.6 (4485.13) Sleeps 42583.7 (373.039) This patch allows the user to control the allocation of PTEs in highmem from the command line ("userpte=nohigh") but retains the status-quo as the default. It is possible that some simple heuristic could be developed which allows auto-tuning of this option however I don't have a sufficiently large machine available to me to perform any particularly meaningful experiments. We could probably handwave up an argument for a threshold at 16G of total RAM. Assuming 768M of lowmem we have 196608 potential lowmem PTE pages. Each page can map 2M of RAM in a PAE-enabled configuration, meaning a maximum of 384G of RAM could potentially be mapped using lowmem PTEs. Even allowing generous factor of 10 to account for other required lowmem allocations, generous slop to account for page sharing (which reduces the total amount of RAM mappable by a given number of PT pages) and other innacuracies in the estimations it would seem that even a 32G machine would not have a particularly pressing need for highmem PTEs. I think 32G could be considered to be at the upper bound of what might be sensible on a 32 bit machine (although I think in practice 64G is still supported). It's seems questionable if HIGHPTE is even a win for any amount of RAM you would sensibly run a 32 bit kernel on rather than going 64 bit. Signed-off-by: Ian Campbell <ian.campbell@citrix.com> LKML-Reference: <1266403090-20162-1-git-send-email-ian.campbell@citrix.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
139 lines
4 KiB
C
139 lines
4 KiB
C
#ifndef _ASM_X86_PGALLOC_H
|
|
#define _ASM_X86_PGALLOC_H
|
|
|
|
#include <linux/threads.h>
|
|
#include <linux/mm.h> /* for struct page */
|
|
#include <linux/pagemap.h>
|
|
|
|
static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
|
|
|
|
#ifdef CONFIG_PARAVIRT
|
|
#include <asm/paravirt.h>
|
|
#else
|
|
#define paravirt_pgd_alloc(mm) __paravirt_pgd_alloc(mm)
|
|
static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) {}
|
|
static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {}
|
|
static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {}
|
|
static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
|
|
unsigned long start, unsigned long count) {}
|
|
static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) {}
|
|
static inline void paravirt_release_pte(unsigned long pfn) {}
|
|
static inline void paravirt_release_pmd(unsigned long pfn) {}
|
|
static inline void paravirt_release_pud(unsigned long pfn) {}
|
|
#endif
|
|
|
|
/*
|
|
* Flags to use when allocating a user page table page.
|
|
*/
|
|
extern gfp_t __userpte_alloc_gfp;
|
|
|
|
/*
|
|
* Allocate and free page tables.
|
|
*/
|
|
extern pgd_t *pgd_alloc(struct mm_struct *);
|
|
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
|
|
|
|
extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
|
|
extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
|
|
|
|
/* Should really implement gc for free page table pages. This could be
|
|
done with a reference count in struct page. */
|
|
|
|
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
|
{
|
|
BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
|
|
free_page((unsigned long)pte);
|
|
}
|
|
|
|
static inline void pte_free(struct mm_struct *mm, struct page *pte)
|
|
{
|
|
pgtable_page_dtor(pte);
|
|
__free_page(pte);
|
|
}
|
|
|
|
extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
|
|
|
|
static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
|
|
unsigned long address)
|
|
{
|
|
___pte_free_tlb(tlb, pte);
|
|
}
|
|
|
|
static inline void pmd_populate_kernel(struct mm_struct *mm,
|
|
pmd_t *pmd, pte_t *pte)
|
|
{
|
|
paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);
|
|
set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
|
|
}
|
|
|
|
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
|
|
struct page *pte)
|
|
{
|
|
unsigned long pfn = page_to_pfn(pte);
|
|
|
|
paravirt_alloc_pte(mm, pfn);
|
|
set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
|
|
}
|
|
|
|
#define pmd_pgtable(pmd) pmd_page(pmd)
|
|
|
|
#if PAGETABLE_LEVELS > 2
|
|
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
|
|
{
|
|
return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
|
|
}
|
|
|
|
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
|
|
{
|
|
BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
|
|
free_page((unsigned long)pmd);
|
|
}
|
|
|
|
extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
|
|
|
|
static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
|
|
unsigned long adddress)
|
|
{
|
|
___pmd_free_tlb(tlb, pmd);
|
|
}
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
|
|
#else /* !CONFIG_X86_PAE */
|
|
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
|
|
{
|
|
paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
|
|
set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
|
|
}
|
|
#endif /* CONFIG_X86_PAE */
|
|
|
|
#if PAGETABLE_LEVELS > 3
|
|
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
|
|
{
|
|
paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
|
|
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
|
|
}
|
|
|
|
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
|
|
{
|
|
return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
|
|
}
|
|
|
|
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
|
|
{
|
|
BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
|
|
free_page((unsigned long)pud);
|
|
}
|
|
|
|
extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
|
|
|
|
static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
|
|
unsigned long address)
|
|
{
|
|
___pud_free_tlb(tlb, pud);
|
|
}
|
|
|
|
#endif /* PAGETABLE_LEVELS > 3 */
|
|
#endif /* PAGETABLE_LEVELS > 2 */
|
|
|
|
#endif /* _ASM_X86_PGALLOC_H */
|