Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- diff -pru a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
- --- a/Documentation/admin-guide/sysctl/vm.rst 2021-01-17 22:17:06.000000000 +0900
- +++ b/Documentation/admin-guide/sysctl/vm.rst 2021-04-17 13:40:37.994904703 +0900
- @@ -26,6 +26,8 @@ Currently, these files are in /proc/sys/
- - admin_reserve_kbytes
- - block_dump
- +- clean_low_kbytes
- +- clean_min_kbytes
- - compact_memory
- - compaction_proactiveness
- - compact_unevictable_allowed
- @@ -113,6 +115,41 @@ block_dump enables block I/O debugging w
- information on block I/O debugging is in Documentation/admin-guide/laptops/laptop-mode.rst.
- +clean_low_kbytes
- +=====================
- +
- +This knob provides *best-effort* protection of clean file pages. The clean file
- +pages on the current node won't be reclaimed under memory pressure when their
- +amount is below vm.clean_low_kbytes *unless* we threaten to OOM or have no
- +free swap space or vm.swappiness=0.
- +
- +Protection of clean file pages may be used to prevent thrashing and
- +reducing I/O under low-memory conditions.
- +
- +Setting it to a high value may result in a early eviction of anonymous pages
- +into the swap space by attempting to hold the protected amount of clean file
- +pages in memory.
- +
- +The default value is defined by CONFIG_CLEAN_LOW_KBYTES.
- +
- +
- +clean_min_kbytes
- +=====================
- +
- +This knob provides *hard* protection of clean file pages. The clean file pages
- +on the current node won't be reclaimed under memory pressure when their amount
- +is below vm.clean_min_kbytes.
- +
- +Hard protection of clean file pages may be used to avoid high latency and
- +prevent livelock in near-OOM conditions.
- +
- +Setting it to a high value may result in a early out-of-memory condition due to
- +the inability to reclaim the protected amount of clean file pages when other
- +types of pages cannot be reclaimed.
- +
- +The default value is defined by CONFIG_CLEAN_MIN_KBYTES.
- +
- +
- compact_memory
- ==============
- diff -pru a/include/linux/mm.h b/include/linux/mm.h
- --- a/include/linux/mm.h 2021-01-17 22:17:06.000000000 +0900
- +++ b/include/linux/mm.h 2021-04-17 14:01:31.460459997 +0900
- @@ -202,6 +202,10 @@ static inline void __mm_zero_struct_page
- extern int sysctl_max_map_count;
- +extern unsigned long sysctl_clean_low_kbytes;
- +extern unsigned long sysctl_clean_min_kbytes;
- +extern unsigned long sysctl_anon_min_kbytes;
- +
- extern unsigned long sysctl_user_reserve_kbytes;
- extern unsigned long sysctl_admin_reserve_kbytes;
- diff -pru a/kernel/sysctl.c b/kernel/sysctl.c
- --- a/kernel/sysctl.c 2021-01-17 22:17:06.000000000 +0900
- +++ b/kernel/sysctl.c 2021-04-17 14:02:40.014926511 +0900
- @@ -3083,6 +3083,27 @@ static struct ctl_table vm_table[] = {
- },
- #endif
- {
- + .procname = "clean_low_kbytes",
- + .data = &sysctl_clean_low_kbytes,
- + .maxlen = sizeof(sysctl_clean_low_kbytes),
- + .mode = 0644,
- + .proc_handler = proc_doulongvec_minmax,
- + },
- + {
- + .procname = "clean_min_kbytes",
- + .data = &sysctl_clean_min_kbytes,
- + .maxlen = sizeof(sysctl_clean_min_kbytes),
- + .mode = 0644,
- + .proc_handler = proc_doulongvec_minmax,
- + },
- + {
- + .procname = "anon_min_kbytes",
- + .data = &sysctl_anon_min_kbytes,
- + .maxlen = sizeof(sysctl_anon_min_kbytes),
- + .mode = 0644,
- + .proc_handler = proc_doulongvec_minmax,
- + },
- + {
- .procname = "user_reserve_kbytes",
- .data = &sysctl_user_reserve_kbytes,
- .maxlen = sizeof(sysctl_user_reserve_kbytes),
- diff -pru a/mm/Kconfig b/mm/Kconfig
- --- a/mm/Kconfig 2021-01-17 22:17:06.000000000 +0900
- +++ b/mm/Kconfig 2021-04-17 14:00:30.806279244 +0900
- @@ -122,6 +122,49 @@ config SPARSEMEM_VMEMMAP
- pfn_to_page and page_to_pfn operations. This is the most
- efficient option when sufficient kernel resources are available.
- +config CLEAN_LOW_KBYTES
- + int "Default value for vm.clean_low_kbytes"
- + depends on SYSCTL
- + default "0"
- + help
- + The vm.clean_low_kbytes sysctl knob provides *best-effort*
- + protection of clean file pages. The clean file pages on the current
- + node won't be reclaimed under memory pressure when their amount is
- + below vm.clean_low_kbytes *unless* we threaten to OOM or have
- + no free swap space or vm.swappiness=0.
- +
- + Protection of clean file pages may be used to prevent thrashing and
- + reducing I/O under low-memory conditions.
- +
- + Setting it to a high value may result in a early eviction of anonymous
- + pages into the swap space by attempting to hold the protected amount of
- + clean file pages in memory.
- +
- +config CLEAN_MIN_KBYTES
- + int "Default value for vm.clean_min_kbytes"
- + depends on SYSCTL
- + default "0"
- + help
- + The vm.clean_min_kbytes sysctl knob provides *hard* protection
- + of clean file pages. The clean file pages on the current node won't be
- + reclaimed under memory pressure when their amount is below
- + vm.clean_min_kbytes.
- +
- + Hard protection of clean file pages may be used to avoid high latency and
- + prevent livelock in near-OOM conditions.
- +
- + Setting it to a high value may result in a early out-of-memory condition
- + due to the inability to reclaim the protected amount of clean file pages
- + when other types of pages cannot be reclaimed.
- +
- +config ANON_MIN_KBYTES
- + int "Default value for vm.anon_min_kbytes"
- + depends on SYSCTL
- + default "0"
- + help
- + pass
- +
- +
- config HAVE_MEMBLOCK_PHYS_MAP
- bool
- diff -pru a/mm/vmscan.c b/mm/vmscan.c
- --- a/mm/vmscan.c 2021-01-17 22:17:06.000000000 +0900
- +++ b/mm/vmscan.c 2021-04-17 13:59:37.252355264 +0900
- @@ -120,6 +120,23 @@ struct scan_control {
- /* The file pages on the current node are dangerously low */
- unsigned int file_is_tiny:1;
- + /*
- + * The clean file pages on the current node won't be reclaimed when
- + * their amount is below vm.clean_low_kbytes *unless* we threaten
- + * to OOM or have no free swap space or vm.swappiness=0.
- + */
- + unsigned int clean_below_low:1;
- +
- + /*
- + * The clean file pages on the current node won't be reclaimed when
- + * their amount is below vm.clean_min_kbytes.
- + */
- + unsigned int clean_below_min:1;
- +
- + unsigned int anon_below_min:1;
- +
- +
- +
- /* Allocation order */
- s8 order;
- @@ -166,6 +183,22 @@ struct scan_control {
- #define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
- #endif
- +#if CONFIG_CLEAN_LOW_KBYTES < 0
- +#error "CONFIG_CLEAN_LOW_KBYTES must be >= 0"
- +#endif
- +
- +#if CONFIG_CLEAN_MIN_KBYTES < 0
- +#error "CONFIG_CLEAN_MIN_KBYTES must be >= 0"
- +#endif
- +
- +#if CONFIG_ANON_MIN_KBYTES < 0
- +#error "CONFIG_ANON_MIN_KBYTES must be >= 0"
- +#endif
- +
- +unsigned long sysctl_clean_low_kbytes __read_mostly = CONFIG_CLEAN_LOW_KBYTES;
- +unsigned long sysctl_clean_min_kbytes __read_mostly = CONFIG_CLEAN_MIN_KBYTES;
- +unsigned long sysctl_anon_min_kbytes __read_mostly = CONFIG_ANON_MIN_KBYTES;
- +
- /*
- * From 0 .. 200. Higher means more swappy.
- */
- @@ -2279,6 +2312,16 @@ static void get_scan_count(struct lruvec
- }
- /*
- + * Force-scan anon if clean file pages is under vm.clean_min_kbytes
- + * or vm.clean_low_kbytes (unless the swappiness setting
- + * disagrees with swapping).
- + */
- + if ((sc->clean_below_low || sc->clean_below_min) && swappiness) {
- + scan_balance = SCAN_ANON;
- + goto out;
- + }
- +
- + /*
- * If there is enough inactive page cache, we do not reclaim
- * anything from the anonymous working right now.
- */
- @@ -2414,6 +2457,14 @@ out:
- BUG();
- }
- + if (file) {
- + if (sc->clean_below_min)
- + scan = 0;
- + } else {
- + if (sc->anon_below_min)
- + scan = 0;
- + }
- +
- nr[lru] = scan;
- }
- }
- @@ -2764,6 +2815,48 @@ again:
- anon >> sc->priority;
- }
- + /*
- + * Check the number of clean file pages to protect them from
- + * reclaiming if their amount is below the specified.
- + */
- + if (sysctl_clean_low_kbytes || sysctl_clean_min_kbytes) {
- + unsigned long reclaimable_file, dirty, clean;
- +
- + reclaimable_file =
- + node_page_state(pgdat, NR_ACTIVE_FILE) +
- + node_page_state(pgdat, NR_INACTIVE_FILE) +
- + node_page_state(pgdat, NR_ISOLATED_FILE);
- + dirty = node_page_state(pgdat, NR_FILE_DIRTY);
- + /*
- + * node_page_state() sum can go out of sync since
- + * all the values are not read at once.
- + */
- + if (likely(reclaimable_file > dirty))
- + clean = (reclaimable_file - dirty) << (PAGE_SHIFT - 10);
- + else
- + clean = 0;
- +
- + sc->clean_below_low = clean < sysctl_clean_low_kbytes;
- + sc->clean_below_min = clean < sysctl_clean_min_kbytes;
- + } else {
- + sc->clean_below_low = false;
- + sc->clean_below_min = false;
- + }
- +
- +
- + if (sysctl_anon_min_kbytes) {
- + unsigned long reclaimable_anon;
- + reclaimable_anon =
- + node_page_state(pgdat, NR_ACTIVE_ANON) +
- + node_page_state(pgdat, NR_INACTIVE_ANON) +
- + node_page_state(pgdat, NR_ISOLATED_ANON);
- + reclaimable_anon <<= (PAGE_SHIFT - 10);
- + sc->anon_below_min = reclaimable_anon < sysctl_anon_min_kbytes;
- + } else {
- + sc->anon_below_min = false;
- + }
- +
- +
- shrink_node_memcgs(pgdat, sc);
- if (reclaim_state) {
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement