VisionFive2 Linux kernel

StarFive Tech Linux Kernel for VisionFive (JH7110) boards (mirror)

More than 9999 Commits   33 Branches   57 Tags
author: Robert Richter <rrichter@marvell.com> 2019-11-05 20:07:51 +0000 committer: Borislav Petkov <bp@suse.de> 2019-11-08 16:28:28 +0100 commit: 23f61b9fc5cc10d87f66e50518707eec2a0fbda1 parent: 582f94b5900a9bccca993dc16ca77364a0aa12a9
Commit Summary:
EDAC/ghes: Fix locking and memory barrier issues
Diffstat:
1 file changed, 57 insertions, 24 deletions
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 0bb62857ffb2..f6f6a688c009 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -26,9 +26,18 @@ struct ghes_edac_pvt {
 	char msg[80];
 };
 
-static atomic_t ghes_init = ATOMIC_INIT(0);
+static refcount_t ghes_refcount = REFCOUNT_INIT(0);
+
+/*
+ * Access to ghes_pvt must be protected by ghes_lock. The spinlock
+ * also provides the necessary (implicit) memory barrier for the SMP
+ * case to make the pointer visible on another CPU.
+ */
 static struct ghes_edac_pvt *ghes_pvt;
 
+/* GHES registration mutex */
+static DEFINE_MUTEX(ghes_reg_mutex);
+
 /*
  * Sync with other, potentially concurrent callers of
  * ghes_edac_report_mem_error(). We don't know what the
@@ -79,9 +88,8 @@ static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
 		(*num_dimm)++;
 }
 
-static int get_dimm_smbios_index(u16 handle)
+static int get_dimm_smbios_index(struct mem_ctl_info *mci, u16 handle)
 {
-	struct mem_ctl_info *mci = ghes_pvt->mci;
 	int i;
 
 	for (i = 0; i < mci->tot_dimms; i++) {
@@ -198,14 +206,11 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 	enum hw_event_mc_err_type type;
 	struct edac_raw_error_desc *e;
 	struct mem_ctl_info *mci;
-	struct ghes_edac_pvt *pvt = ghes_pvt;
+	struct ghes_edac_pvt *pvt;
 	unsigned long flags;
 	char *p;
 	u8 grain_bits;
 
-	if (!pvt)
-		return;
-
 	/*
 	 * We can do the locking below because GHES defers error processing
 	 * from NMI to IRQ context. Whenever that changes, we'd at least
@@ -216,6 +221,10 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 
 	spin_lock_irqsave(&ghes_lock, flags);
 
+	pvt = ghes_pvt;
+	if (!pvt)
+		goto unlock;
+
 	mci = pvt->mci;
 	e = &mci->error_desc;
 
@@ -348,7 +357,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 			p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
 				     mem_err->mem_dev_handle);
 
-		index = get_dimm_smbios_index(mem_err->mem_dev_handle);
+		index = get_dimm_smbios_index(mci, mem_err->mem_dev_handle);
 		if (index >= 0) {
 			e->top_layer = index;
 			e->enable_per_layer_report = true;
@@ -443,6 +452,8 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 		       grain_bits, e->syndrome, pvt->detail_location);
 
 	edac_raw_mc_handle_error(type, mci, e);
+
+unlock:
 	spin_unlock_irqrestore(&ghes_lock, flags);
 }
 
@@ -457,10 +468,12 @@ static struct acpi_platform_list plat_list[] = {
 int ghes_edac_register(struct ghes *ghes, struct device *dev)
 {
 	bool fake = false;
-	int rc, num_dimm = 0;
+	int rc = 0, num_dimm = 0;
 	struct mem_ctl_info *mci;
+	struct ghes_edac_pvt *pvt;
 	struct edac_mc_layer layers[1];
 	struct ghes_edac_dimm_fill dimm_fill;
+	unsigned long flags;
 	int idx = -1;
 
 	if (IS_ENABLED(CONFIG_X86)) {
@@ -472,11 +485,14 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
 		idx = 0;
 	}
 
+	/* finish another registration/unregistration instance first */
+	mutex_lock(&ghes_reg_mutex);
+
 	/*
 	 * We have only one logical memory controller to which all DIMMs belong.
 	 */
-	if (atomic_inc_return(&ghes_init) > 1)
-		return 0;
+	if (refcount_inc_not_zero(&ghes_refcount))
+		goto unlock;
 
 	/* Get the number of DIMMs */
 	dmi_walk(ghes_edac_count_dimms, &num_dimm);
@@ -494,12 +510,13 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
 	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_edac_pvt));
 	if (!mci) {
 		pr_info("Can't allocate memory for EDAC data\n");
-		return -ENOMEM;
+		rc = -ENOMEM;
+		goto unlock;
 	}
 
-	ghes_pvt	= mci->pvt_info;
-	ghes_pvt->ghes	= ghes;
-	ghes_pvt->mci	= mci;
+	pvt		= mci->pvt_info;
+	pvt->ghes	= ghes;
+	pvt->mci	= mci;
 
 	mci->pdev = dev;
 	mci->mtype_cap = MEM_FLAG_EMPTY;
@@ -541,23 +558,48 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
 	if (rc < 0) {
 		pr_info("Can't register at EDAC core\n");
 		edac_mc_free(mci);
-		return -ENODEV;
+		rc = -ENODEV;
+		goto unlock;
 	}
-	return 0;
+
+	spin_lock_irqsave(&ghes_lock, flags);
+	ghes_pvt = pvt;
+	spin_unlock_irqrestore(&ghes_lock, flags);
+
+	/* only increment on success */
+	refcount_inc(&ghes_refcount);
+
+unlock:
+	mutex_unlock(&ghes_reg_mutex);
+
+	return rc;
 }
 
 void ghes_edac_unregister(struct ghes *ghes)
 {
 	struct mem_ctl_info *mci;
+	unsigned long flags;
 
-	if (!ghes_pvt)
-		return;
+	mutex_lock(&ghes_reg_mutex);
 
-	if (atomic_dec_return(&ghes_init))
-		return;
+	if (!refcount_dec_and_test(&ghes_refcount))
+		goto unlock;
 
-	mci = ghes_pvt->mci;
+	/*
+	 * Wait for the irq handler being finished.
+	 */
+	spin_lock_irqsave(&ghes_lock, flags);
+	mci = ghes_pvt ? ghes_pvt->mci : NULL;
 	ghes_pvt = NULL;
-	edac_mc_del_mc(mci->pdev);
-	edac_mc_free(mci);
+	spin_unlock_irqrestore(&ghes_lock, flags);
+
+	if (!mci)
+		goto unlock;
+
+	mci = edac_mc_del_mc(mci->pdev);
+	if (mci)
+		edac_mc_free(mci);
+
+unlock:
+	mutex_unlock(&ghes_reg_mutex);
 }