linux/include/trace/events/vmscan.h
KOSAKI Motohiro 7d3579e8e6 vmscan: narrow the scenarios in whcih lumpy reclaim uses synchrounous reclaim
shrink_page_list() can decide to give up reclaiming a page under a
number of conditions such as

  1. trylock_page() failure
  2. page is unevictable
  3. zone reclaim and page is mapped
  4. PageWriteback() is true
  5. page is swapbacked and swap is full
  6. add_to_swap() failure
  7. page is dirty and gfpmask don't have GFP_IO, GFP_FS
  8. page is pinned
  9. IO queue is congested
 10. pageout() start IO, but not finished

With lumpy reclaim, failures result in entering synchronous lumpy reclaim
but this can be unnecessary.  In cases (2), (3), (5), (6), (7) and (8),
there is no point retrying.  This patch causes lumpy reclaim to abort when
it is known it will fail.

Case (9) is more interesting. current behavior is,
  1. start shrink_page_list(async)
  2. found queue_congested()
  3. skip pageout write
  4. still start shrink_page_list(sync)
  5. wait on a lot of pages
  6. again, found queue_congested()
  7. give up pageout write again

So, it's useless time wasting.  However, just skipping page reclaim is
also notgood as x86 allocating a huge page needs 512 pages for example.
It can have more dirty pages than queue congestion threshold (~=128).

After this patch, pageout() behaves as follows;

 - If order > PAGE_ALLOC_COSTLY_ORDER
	Ignore queue congestion always.
 - If order <= PAGE_ALLOC_COSTLY_ORDER
	skip write page and disable lumpy reclaim.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2010-10-26 16:52:07 -07:00

317 lines
7.3 KiB
C

#undef TRACE_SYSTEM
#define TRACE_SYSTEM vmscan
#if !defined(_TRACE_VMSCAN_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_VMSCAN_H
#include <linux/types.h>
#include <linux/tracepoint.h>
#include "gfpflags.h"
#define RECLAIM_WB_ANON 0x0001u
#define RECLAIM_WB_FILE 0x0002u
#define RECLAIM_WB_MIXED 0x0010u
#define RECLAIM_WB_SYNC 0x0004u
#define RECLAIM_WB_ASYNC 0x0008u
#define show_reclaim_flags(flags) \
(flags) ? __print_flags(flags, "|", \
{RECLAIM_WB_ANON, "RECLAIM_WB_ANON"}, \
{RECLAIM_WB_FILE, "RECLAIM_WB_FILE"}, \
{RECLAIM_WB_MIXED, "RECLAIM_WB_MIXED"}, \
{RECLAIM_WB_SYNC, "RECLAIM_WB_SYNC"}, \
{RECLAIM_WB_ASYNC, "RECLAIM_WB_ASYNC"} \
) : "RECLAIM_WB_NONE"
#define trace_reclaim_flags(page, sync) ( \
(page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
(sync == LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \
)
#define trace_shrink_flags(file, sync) ( \
(sync == LUMPY_MODE_SYNC ? RECLAIM_WB_MIXED : \
(file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) | \
(sync == LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \
)
TRACE_EVENT(mm_vmscan_kswapd_sleep,
TP_PROTO(int nid),
TP_ARGS(nid),
TP_STRUCT__entry(
__field( int, nid )
),
TP_fast_assign(
__entry->nid = nid;
),
TP_printk("nid=%d", __entry->nid)
);
TRACE_EVENT(mm_vmscan_kswapd_wake,
TP_PROTO(int nid, int order),
TP_ARGS(nid, order),
TP_STRUCT__entry(
__field( int, nid )
__field( int, order )
),
TP_fast_assign(
__entry->nid = nid;
__entry->order = order;
),
TP_printk("nid=%d order=%d", __entry->nid, __entry->order)
);
TRACE_EVENT(mm_vmscan_wakeup_kswapd,
TP_PROTO(int nid, int zid, int order),
TP_ARGS(nid, zid, order),
TP_STRUCT__entry(
__field( int, nid )
__field( int, zid )
__field( int, order )
),
TP_fast_assign(
__entry->nid = nid;
__entry->zid = zid;
__entry->order = order;
),
TP_printk("nid=%d zid=%d order=%d",
__entry->nid,
__entry->zid,
__entry->order)
);
DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_begin_template,
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
TP_ARGS(order, may_writepage, gfp_flags),
TP_STRUCT__entry(
__field( int, order )
__field( int, may_writepage )
__field( gfp_t, gfp_flags )
),
TP_fast_assign(
__entry->order = order;
__entry->may_writepage = may_writepage;
__entry->gfp_flags = gfp_flags;
),
TP_printk("order=%d may_writepage=%d gfp_flags=%s",
__entry->order,
__entry->may_writepage,
show_gfp_flags(__entry->gfp_flags))
);
DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_direct_reclaim_begin,
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
TP_ARGS(order, may_writepage, gfp_flags)
);
DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_reclaim_begin,
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
TP_ARGS(order, may_writepage, gfp_flags)
);
DEFINE_EVENT(mm_vmscan_direct_reclaim_begin_template, mm_vmscan_memcg_softlimit_reclaim_begin,
TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
TP_ARGS(order, may_writepage, gfp_flags)
);
DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_end_template,
TP_PROTO(unsigned long nr_reclaimed),
TP_ARGS(nr_reclaimed),
TP_STRUCT__entry(
__field( unsigned long, nr_reclaimed )
),
TP_fast_assign(
__entry->nr_reclaimed = nr_reclaimed;
),
TP_printk("nr_reclaimed=%lu", __entry->nr_reclaimed)
);
DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_direct_reclaim_end,
TP_PROTO(unsigned long nr_reclaimed),
TP_ARGS(nr_reclaimed)
);
DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_reclaim_end,
TP_PROTO(unsigned long nr_reclaimed),
TP_ARGS(nr_reclaimed)
);
DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_reclaim_end,
TP_PROTO(unsigned long nr_reclaimed),
TP_ARGS(nr_reclaimed)
);
DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
TP_PROTO(int order,
unsigned long nr_requested,
unsigned long nr_scanned,
unsigned long nr_taken,
unsigned long nr_lumpy_taken,
unsigned long nr_lumpy_dirty,
unsigned long nr_lumpy_failed,
int isolate_mode),
TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode),
TP_STRUCT__entry(
__field(int, order)
__field(unsigned long, nr_requested)
__field(unsigned long, nr_scanned)
__field(unsigned long, nr_taken)
__field(unsigned long, nr_lumpy_taken)
__field(unsigned long, nr_lumpy_dirty)
__field(unsigned long, nr_lumpy_failed)
__field(int, isolate_mode)
),
TP_fast_assign(
__entry->order = order;
__entry->nr_requested = nr_requested;
__entry->nr_scanned = nr_scanned;
__entry->nr_taken = nr_taken;
__entry->nr_lumpy_taken = nr_lumpy_taken;
__entry->nr_lumpy_dirty = nr_lumpy_dirty;
__entry->nr_lumpy_failed = nr_lumpy_failed;
__entry->isolate_mode = isolate_mode;
),
TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu contig_taken=%lu contig_dirty=%lu contig_failed=%lu",
__entry->isolate_mode,
__entry->order,
__entry->nr_requested,
__entry->nr_scanned,
__entry->nr_taken,
__entry->nr_lumpy_taken,
__entry->nr_lumpy_dirty,
__entry->nr_lumpy_failed)
);
DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate,
TP_PROTO(int order,
unsigned long nr_requested,
unsigned long nr_scanned,
unsigned long nr_taken,
unsigned long nr_lumpy_taken,
unsigned long nr_lumpy_dirty,
unsigned long nr_lumpy_failed,
int isolate_mode),
TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode)
);
DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate,
TP_PROTO(int order,
unsigned long nr_requested,
unsigned long nr_scanned,
unsigned long nr_taken,
unsigned long nr_lumpy_taken,
unsigned long nr_lumpy_dirty,
unsigned long nr_lumpy_failed,
int isolate_mode),
TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode)
);
TRACE_EVENT(mm_vmscan_writepage,
TP_PROTO(struct page *page,
int reclaim_flags),
TP_ARGS(page, reclaim_flags),
TP_STRUCT__entry(
__field(struct page *, page)
__field(int, reclaim_flags)
),
TP_fast_assign(
__entry->page = page;
__entry->reclaim_flags = reclaim_flags;
),
TP_printk("page=%p pfn=%lu flags=%s",
__entry->page,
page_to_pfn(__entry->page),
show_reclaim_flags(__entry->reclaim_flags))
);
TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
TP_PROTO(int nid, int zid,
unsigned long nr_scanned, unsigned long nr_reclaimed,
int priority, int reclaim_flags),
TP_ARGS(nid, zid, nr_scanned, nr_reclaimed, priority, reclaim_flags),
TP_STRUCT__entry(
__field(int, nid)
__field(int, zid)
__field(unsigned long, nr_scanned)
__field(unsigned long, nr_reclaimed)
__field(int, priority)
__field(int, reclaim_flags)
),
TP_fast_assign(
__entry->nid = nid;
__entry->zid = zid;
__entry->nr_scanned = nr_scanned;
__entry->nr_reclaimed = nr_reclaimed;
__entry->priority = priority;
__entry->reclaim_flags = reclaim_flags;
),
TP_printk("nid=%d zid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s",
__entry->nid, __entry->zid,
__entry->nr_scanned, __entry->nr_reclaimed,
__entry->priority,
show_reclaim_flags(__entry->reclaim_flags))
);
#endif /* _TRACE_VMSCAN_H */
/* This part must be outside protection */
#include <trace/define_trace.h>