author: David Hildenbrand <david@redhat.com> 2021-09-07 19:55:48 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2021-09-08 11:50:23 -0700
commit: 3fcebf90209a7f52d384ad7701425aa91be309ab
parent: 445fcf7c721450dd1d4ec6c217b3c6a932602a44
Commit Summary:
Diffstat:
1 file changed, 51 insertions, 4 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 248e2ba4ac59..b80fb8164fb8 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -752,11 +752,44 @@ static void auto_movable_stats_account_zone(struct auto_movable_stats *stats,
#endif /* CONFIG_CMA */
}
}
+struct auto_movable_group_stats {
+ unsigned long movable_pages;
+ unsigned long req_kernel_early_pages;
+};
-static bool auto_movable_can_online_movable(int nid, unsigned long nr_pages)
+static int auto_movable_stats_account_group(struct memory_group *group,
+ void *arg)
+{
+ const int ratio = READ_ONCE(auto_movable_ratio);
+ struct auto_movable_group_stats *stats = arg;
+ long pages;
+
+ /*
+ * We don't support modifying the config while the auto-movable online
+ * policy is already enabled. Just avoid the division by zero below.
+ */
+ if (!ratio)
+ return 0;
+
+ /*
+ * Calculate how many early kernel pages this group requires to
+ * satisfy the configured zone ratio.
+ */
+ pages = group->present_movable_pages * 100 / ratio;
+ pages -= group->present_kernel_pages;
+
+ if (pages > 0)
+ stats->req_kernel_early_pages += pages;
+ stats->movable_pages += group->present_movable_pages;
+ return 0;
+}
+
+static bool auto_movable_can_online_movable(int nid, struct memory_group *group,
+ unsigned long nr_pages)
{
- struct auto_movable_stats stats = {};
unsigned long kernel_early_pages, movable_pages;
+ struct auto_movable_group_stats group_stats = {};
+ struct auto_movable_stats stats = {};
pg_data_t *pgdat = NODE_DATA(nid);
struct zone *zone;
int i;
@@ -777,6 +810,21 @@ static bool auto_movable_can_online_movable(int nid, unsigned long nr_pages)
kernel_early_pages = stats.kernel_early_pages;
movable_pages = stats.movable_pages;
+ /*
+ * Kernel memory inside dynamic memory group allows for more MOVABLE
+ * memory within the same group. Remove the effect of all but the
+ * current group from the stats.
+ */
+ walk_dynamic_memory_groups(nid, auto_movable_stats_account_group,
+ group, &group_stats);
+ if (kernel_early_pages <= group_stats.req_kernel_early_pages)
+ return false;
+ kernel_early_pages -= group_stats.req_kernel_early_pages;
+ movable_pages -= group_stats.movable_pages;
+
+ if (group && group->is_dynamic)
+ kernel_early_pages += group->present_kernel_pages;
+
/*
* Test if we could online the given number of pages to ZONE_MOVABLE
* and still stay in the configured ratio.
@@ -834,6 +882,10 @@ static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn
* with unmovable allocations). While there are corner cases where it might
* still work, it is barely relevant in practice.
*
+ * Exceptions are dynamic memory groups, which allow for more MOVABLE
+ * memory within the same memory group -- because in that case, there is
+ * coordination within the single memory device managed by a single driver.
+ *
* We rely on "present pages" instead of "managed pages", as the latter is
* highly unreliable and dynamic in virtualized environments, and does not
* consider boot time allocations. For example, memory ballooning adjusts the
@@ -899,12 +951,12 @@ static struct zone *auto_movable_zone_for_pfn(int nid,
* nobody interferes, all will be MOVABLE if possible.
*/
nr_pages = max_pages - online_pages;
- if (!auto_movable_can_online_movable(NUMA_NO_NODE, nr_pages))
+ if (!auto_movable_can_online_movable(NUMA_NO_NODE, group, nr_pages))
goto kernel_zone;
#ifdef CONFIG_NUMA
if (auto_movable_numa_aware &&
- !auto_movable_can_online_movable(nid, nr_pages))
+ !auto_movable_can_online_movable(nid, group, nr_pages))
goto kernel_zone;
#endif /* CONFIG_NUMA */