Advertisement
Guest User

Untitled

a guest
Apr 17th, 2021
149
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 8.90 KB | None | 0 0
  1. diff -pru a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
  2. --- a/Documentation/admin-guide/sysctl/vm.rst   2021-01-17 22:17:06.000000000 +0900
  3. +++ b/Documentation/admin-guide/sysctl/vm.rst   2021-04-17 13:40:37.994904703 +0900
  4. @@ -26,6 +26,8 @@ Currently, these files are in /proc/sys/
  5.  
  6.  - admin_reserve_kbytes
  7.  - block_dump
  8. +- clean_low_kbytes
  9. +- clean_min_kbytes
  10.  - compact_memory
  11.  - compaction_proactiveness
  12.  - compact_unevictable_allowed
  13. @@ -113,6 +115,41 @@ block_dump enables block I/O debugging w
  14.  information on block I/O debugging is in Documentation/admin-guide/laptops/laptop-mode.rst.
  15.  
  16.  
  17. +clean_low_kbytes
  18. +=====================
  19. +
  20. +This knob provides *best-effort* protection of clean file pages. The clean file
  21. +pages on the current node won't be reclaimed under memory pressure when their
  22. +amount is below vm.clean_low_kbytes *unless* we threaten to OOM or have no
  23. +free swap space or vm.swappiness=0.
  24. +
  25. +Protection of clean file pages may be used to prevent thrashing and
  26. +reducing I/O under low-memory conditions.
  27. +
  28. +Setting it to a high value may result in a early eviction of anonymous pages
  29. +into the swap space by attempting to hold the protected amount of clean file
  30. +pages in memory.
  31. +
  32. +The default value is defined by CONFIG_CLEAN_LOW_KBYTES.
  33. +
  34. +
  35. +clean_min_kbytes
  36. +=====================
  37. +
  38. +This knob provides *hard* protection of clean file pages. The clean file pages
  39. +on the current node won't be reclaimed under memory pressure when their amount
  40. +is below vm.clean_min_kbytes.
  41. +
  42. +Hard protection of clean file pages may be used to avoid high latency and
  43. +prevent livelock in near-OOM conditions.
  44. +
  45. +Setting it to a high value may result in a early out-of-memory condition due to
  46. +the inability to reclaim the protected amount of clean file pages when other
  47. +types of pages cannot be reclaimed.
  48. +
  49. +The default value is defined by CONFIG_CLEAN_MIN_KBYTES.
  50. +
  51. +
  52.  compact_memory
  53.  ==============
  54.  
  55. diff -pru a/include/linux/mm.h b/include/linux/mm.h
  56. --- a/include/linux/mm.h    2021-01-17 22:17:06.000000000 +0900
  57. +++ b/include/linux/mm.h    2021-04-17 14:01:31.460459997 +0900
  58. @@ -202,6 +202,10 @@ static inline void __mm_zero_struct_page
  59.  
  60.  extern int sysctl_max_map_count;
  61.  
  62. +extern unsigned long sysctl_clean_low_kbytes;
  63. +extern unsigned long sysctl_clean_min_kbytes;
  64. +extern unsigned long sysctl_anon_min_kbytes;
  65. +
  66.  extern unsigned long sysctl_user_reserve_kbytes;
  67.  extern unsigned long sysctl_admin_reserve_kbytes;
  68.  
  69. diff -pru a/kernel/sysctl.c b/kernel/sysctl.c
  70. --- a/kernel/sysctl.c   2021-01-17 22:17:06.000000000 +0900
  71. +++ b/kernel/sysctl.c   2021-04-17 14:02:40.014926511 +0900
  72. @@ -3083,6 +3083,27 @@ static struct ctl_table vm_table[] = {
  73.     },
  74.  #endif
  75.     {
  76. +       .procname   = "clean_low_kbytes",
  77. +       .data       = &sysctl_clean_low_kbytes,
  78. +       .maxlen     = sizeof(sysctl_clean_low_kbytes),
  79. +       .mode       = 0644,
  80. +       .proc_handler   = proc_doulongvec_minmax,
  81. +   },
  82. +   {
  83. +       .procname   = "clean_min_kbytes",
  84. +       .data       = &sysctl_clean_min_kbytes,
  85. +       .maxlen     = sizeof(sysctl_clean_min_kbytes),
  86. +       .mode       = 0644,
  87. +       .proc_handler   = proc_doulongvec_minmax,
  88. +   },
  89. +   {
  90. +       .procname   = "anon_min_kbytes",
  91. +       .data       = &sysctl_anon_min_kbytes,
  92. +       .maxlen     = sizeof(sysctl_anon_min_kbytes),
  93. +       .mode       = 0644,
  94. +       .proc_handler   = proc_doulongvec_minmax,
  95. +   },
  96. +   {
  97.         .procname   = "user_reserve_kbytes",
  98.         .data       = &sysctl_user_reserve_kbytes,
  99.         .maxlen     = sizeof(sysctl_user_reserve_kbytes),
  100. diff -pru a/mm/Kconfig b/mm/Kconfig
  101. --- a/mm/Kconfig    2021-01-17 22:17:06.000000000 +0900
  102. +++ b/mm/Kconfig    2021-04-17 14:00:30.806279244 +0900
  103. @@ -122,6 +122,49 @@ config SPARSEMEM_VMEMMAP
  104.       pfn_to_page and page_to_pfn operations.  This is the most
  105.       efficient option when sufficient kernel resources are available.
  106.  
  107. +config CLEAN_LOW_KBYTES
  108. +   int "Default value for vm.clean_low_kbytes"
  109. +   depends on SYSCTL
  110. +   default "0"
  111. +   help
  112. +     The vm.clean_low_kbytes sysctl knob provides *best-effort*
  113. +     protection of clean file pages. The clean file pages on the current
  114. +     node won't be reclaimed under memory pressure when their amount is
  115. +     below vm.clean_low_kbytes *unless* we threaten to OOM or have
  116. +     no free swap space or vm.swappiness=0.
  117. +
  118. +     Protection of clean file pages may be used to prevent thrashing and
  119. +     reducing I/O under low-memory conditions.
  120. +
  121. +     Setting it to a high value may result in a early eviction of anonymous
  122. +     pages into the swap space by attempting to hold the protected amount of
  123. +     clean file pages in memory.
  124. +
  125. +config CLEAN_MIN_KBYTES
  126. +   int "Default value for vm.clean_min_kbytes"
  127. +   depends on SYSCTL
  128. +   default "0"
  129. +   help
  130. +     The vm.clean_min_kbytes sysctl knob provides *hard* protection
  131. +     of clean file pages. The clean file pages on the current node won't be
  132. +     reclaimed under memory pressure when their amount is below
  133. +     vm.clean_min_kbytes.
  134. +
  135. +     Hard protection of clean file pages may be used to avoid high latency and
  136. +     prevent livelock in near-OOM conditions.
  137. +
  138. +     Setting it to a high value may result in a early out-of-memory condition
  139. +     due to the inability to reclaim the protected amount of clean file pages
  140. +     when other types of pages cannot be reclaimed.
  141. +
  142. +config ANON_MIN_KBYTES
  143. +   int "Default value for vm.anon_min_kbytes"
  144. +   depends on SYSCTL
  145. +   default "0"
  146. +   help
  147. +     pass
  148. +
  149. +
  150.  config HAVE_MEMBLOCK_PHYS_MAP
  151.     bool
  152.  
  153. diff -pru a/mm/vmscan.c b/mm/vmscan.c
  154. --- a/mm/vmscan.c   2021-01-17 22:17:06.000000000 +0900
  155. +++ b/mm/vmscan.c   2021-04-17 13:59:37.252355264 +0900
  156. @@ -120,6 +120,23 @@ struct scan_control {
  157.     /* The file pages on the current node are dangerously low */
  158.     unsigned int file_is_tiny:1;
  159.  
  160. +   /*
  161. +    * The clean file pages on the current node won't be reclaimed when
  162. +    * their amount is below vm.clean_low_kbytes *unless* we threaten
  163. +    * to OOM or have no free swap space or vm.swappiness=0.
  164. +    */
  165. +   unsigned int clean_below_low:1;
  166. +
  167. +   /*
  168. +    * The clean file pages on the current node won't be reclaimed when
  169. +    * their amount is below vm.clean_min_kbytes.
  170. +    */
  171. +   unsigned int clean_below_min:1;
  172. +
  173. +   unsigned int anon_below_min:1;
  174. +
  175. +
  176. +
  177.     /* Allocation order */
  178.     s8 order;
  179.  
  180. @@ -166,6 +183,22 @@ struct scan_control {
  181.  #define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
  182.  #endif
  183.  
  184. +#if CONFIG_CLEAN_LOW_KBYTES < 0
  185. +#error "CONFIG_CLEAN_LOW_KBYTES must be >= 0"
  186. +#endif
  187. +
  188. +#if CONFIG_CLEAN_MIN_KBYTES < 0
  189. +#error "CONFIG_CLEAN_MIN_KBYTES must be >= 0"
  190. +#endif
  191. +
  192. +#if CONFIG_ANON_MIN_KBYTES < 0
  193. +#error "CONFIG_ANON_MIN_KBYTES must be >= 0"
  194. +#endif
  195. +
  196. +unsigned long sysctl_clean_low_kbytes __read_mostly = CONFIG_CLEAN_LOW_KBYTES;
  197. +unsigned long sysctl_clean_min_kbytes __read_mostly = CONFIG_CLEAN_MIN_KBYTES;
  198. +unsigned long sysctl_anon_min_kbytes __read_mostly = CONFIG_ANON_MIN_KBYTES;
  199. +
  200.  /*
  201.   * From 0 .. 200.  Higher means more swappy.
  202.   */
  203. @@ -2279,6 +2312,16 @@ static void get_scan_count(struct lruvec
  204.     }
  205.  
  206.     /*
  207. +    * Force-scan anon if clean file pages is under vm.clean_min_kbytes
  208. +    * or vm.clean_low_kbytes (unless the swappiness setting
  209. +    * disagrees with swapping).
  210. +    */
  211. +   if ((sc->clean_below_low || sc->clean_below_min) && swappiness) {
  212. +       scan_balance = SCAN_ANON;
  213. +       goto out;
  214. +   }
  215. +
  216. +   /*
  217.      * If there is enough inactive page cache, we do not reclaim
  218.      * anything from the anonymous working right now.
  219.      */
  220. @@ -2414,6 +2457,14 @@ out:
  221.             BUG();
  222.         }
  223.  
  224. +       if (file) {
  225. +           if (sc->clean_below_min)
  226. +               scan = 0;
  227. +       } else {
  228. +           if (sc->anon_below_min)
  229. +               scan = 0;
  230. +       }
  231. +
  232.         nr[lru] = scan;
  233.     }
  234.  }
  235. @@ -2764,6 +2815,48 @@ again:
  236.             anon >> sc->priority;
  237.     }
  238.  
  239. +   /*
  240. +    * Check the number of clean file pages to protect them from
  241. +    * reclaiming if their amount is below the specified.
  242. +    */
  243. +   if (sysctl_clean_low_kbytes || sysctl_clean_min_kbytes) {
  244. +       unsigned long reclaimable_file, dirty, clean;
  245. +
  246. +       reclaimable_file =
  247. +           node_page_state(pgdat, NR_ACTIVE_FILE) +
  248. +           node_page_state(pgdat, NR_INACTIVE_FILE) +
  249. +           node_page_state(pgdat, NR_ISOLATED_FILE);
  250. +       dirty = node_page_state(pgdat, NR_FILE_DIRTY);
  251. +       /*
  252. +        * node_page_state() sum can go out of sync since
  253. +        * all the values are not read at once.
  254. +        */
  255. +       if (likely(reclaimable_file > dirty))
  256. +           clean = (reclaimable_file - dirty) << (PAGE_SHIFT - 10);
  257. +       else
  258. +           clean = 0;
  259. +
  260. +       sc->clean_below_low = clean < sysctl_clean_low_kbytes;
  261. +       sc->clean_below_min = clean < sysctl_clean_min_kbytes;
  262. +   } else {
  263. +       sc->clean_below_low = false;
  264. +       sc->clean_below_min = false;
  265. +   }
  266. +
  267. +
  268. +   if (sysctl_anon_min_kbytes) {
  269. +       unsigned long reclaimable_anon;
  270. +       reclaimable_anon =
  271. +           node_page_state(pgdat, NR_ACTIVE_ANON) +
  272. +           node_page_state(pgdat, NR_INACTIVE_ANON) +
  273. +           node_page_state(pgdat, NR_ISOLATED_ANON);
  274. +       reclaimable_anon <<= (PAGE_SHIFT - 10);
  275. +       sc->anon_below_min = reclaimable_anon < sysctl_anon_min_kbytes;
  276. +   } else {
  277. +       sc->anon_below_min = false;
  278. +   }
  279. +
  280. +
  281.     shrink_node_memcgs(pgdat, sc);
  282.  
  283.     if (reclaim_state) {
  284.  
  285.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement