You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS changes from Ingo Molnar:
"[ The reason for drivers/ updates is that Boris asked for the
drivers/edac/ changes to go via x86/ras in this cycle ]
Main changes:
- AMD CPUs:
. Add ECC event decoding support for new F15h models
. Various erratum fixes
. Fix single-channel on dual-channel-controllers bug.
- Intel CPUs:
. UC uncorrectable memory error parsing fix
. Add support for CMC (Corrected Machine Check) 'FF' (Firmware
First) flag in the APEI HEST
- Various cleanups and fixes"
* 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
amd64_edac: Fix incorrect wraparounds
amd64_edac: Correct erratum 505 range
cpc925_edac: Use proper array termination
x86/mce, acpi/apei: Only disable banks listed in HEST if mce is configured
amd64_edac: Get rid of boot_cpu_data accesses
amd64_edac: Add ECC decoding support for newer F15h models
x86, amd_nb: Clarify F15h, model 30h GART and L3 support
pci_ids: Add PCI device ID functions 3 and 4 for newer F15h models.
x38_edac: Make a local function static
i3200_edac: Make a local function static
x86/mce: Pay no attention to 'F' bit in MCACOD when parsing 'UC' errors
APEI/ERST: Fix error message formatting
amd64_edac: Fix single-channel setups
EDAC: Replace strict_strtol() with kstrtol()
mce: acpi/apei: Soft-offline a page on firmware GHES notification
mce: acpi/apei: Add a boot option to disable ff mode for corrected errors
mce: acpi/apei: Honour Firmware First for MCA banks listed in APEI HEST CMC
This commit is contained in:
+23
-28
@@ -39,7 +39,8 @@
|
||||
|
||||
#include "apei-internal.h"
|
||||
|
||||
#define ERST_PFX "ERST: "
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "ERST: " fmt
|
||||
|
||||
/* ERST command status */
|
||||
#define ERST_STATUS_SUCCESS 0x0
|
||||
@@ -109,8 +110,7 @@ static inline int erst_errno(int command_status)
|
||||
static int erst_timedout(u64 *t, u64 spin_unit)
|
||||
{
|
||||
if ((s64)*t < spin_unit) {
|
||||
pr_warning(FW_WARN ERST_PFX
|
||||
"Firmware does not respond in time\n");
|
||||
pr_warn(FW_WARN "Firmware does not respond in time.\n");
|
||||
return 1;
|
||||
}
|
||||
*t -= spin_unit;
|
||||
@@ -186,8 +186,8 @@ static int erst_exec_stall(struct apei_exec_context *ctx,
|
||||
|
||||
if (ctx->value > FIRMWARE_MAX_STALL) {
|
||||
if (!in_nmi())
|
||||
pr_warning(FW_WARN ERST_PFX
|
||||
"Too long stall time for stall instruction: %llx.\n",
|
||||
pr_warn(FW_WARN
|
||||
"Too long stall time for stall instruction: 0x%llx.\n",
|
||||
ctx->value);
|
||||
stall_time = FIRMWARE_MAX_STALL;
|
||||
} else
|
||||
@@ -206,8 +206,8 @@ static int erst_exec_stall_while_true(struct apei_exec_context *ctx,
|
||||
|
||||
if (ctx->var1 > FIRMWARE_MAX_STALL) {
|
||||
if (!in_nmi())
|
||||
pr_warning(FW_WARN ERST_PFX
|
||||
"Too long stall time for stall while true instruction: %llx.\n",
|
||||
pr_warn(FW_WARN
|
||||
"Too long stall time for stall while true instruction: 0x%llx.\n",
|
||||
ctx->var1);
|
||||
stall_time = FIRMWARE_MAX_STALL;
|
||||
} else
|
||||
@@ -271,8 +271,7 @@ static int erst_exec_move_data(struct apei_exec_context *ctx,
|
||||
|
||||
/* ioremap does not work in interrupt context */
|
||||
if (in_interrupt()) {
|
||||
pr_warning(ERST_PFX
|
||||
"MOVE_DATA can not be used in interrupt context");
|
||||
pr_warn("MOVE_DATA can not be used in interrupt context.\n");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
@@ -524,8 +523,7 @@ retry:
|
||||
ERST_RECORD_ID_CACHE_SIZE_MAX);
|
||||
if (new_size <= erst_record_id_cache.size) {
|
||||
if (printk_ratelimit())
|
||||
pr_warning(FW_WARN ERST_PFX
|
||||
"too many record ID!\n");
|
||||
pr_warn(FW_WARN "too many record IDs!\n");
|
||||
return 0;
|
||||
}
|
||||
alloc_size = new_size * sizeof(entries[0]);
|
||||
@@ -761,8 +759,7 @@ static int __erst_clear_from_storage(u64 record_id)
|
||||
static void pr_unimpl_nvram(void)
|
||||
{
|
||||
if (printk_ratelimit())
|
||||
pr_warning(ERST_PFX
|
||||
"NVRAM ERST Log Address Range is not implemented yet\n");
|
||||
pr_warn("NVRAM ERST Log Address Range not implemented yet.\n");
|
||||
}
|
||||
|
||||
static int __erst_write_to_nvram(const struct cper_record_header *record)
|
||||
@@ -1133,7 +1130,7 @@ static int __init erst_init(void)
|
||||
goto err;
|
||||
|
||||
if (erst_disable) {
|
||||
pr_info(ERST_PFX
|
||||
pr_info(
|
||||
"Error Record Serialization Table (ERST) support is disabled.\n");
|
||||
goto err;
|
||||
}
|
||||
@@ -1144,14 +1141,14 @@ static int __init erst_init(void)
|
||||
goto err;
|
||||
else if (ACPI_FAILURE(status)) {
|
||||
const char *msg = acpi_format_exception(status);
|
||||
pr_err(ERST_PFX "Failed to get table, %s\n", msg);
|
||||
pr_err("Failed to get table, %s\n", msg);
|
||||
rc = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
rc = erst_check_table(erst_tab);
|
||||
if (rc) {
|
||||
pr_err(FW_BUG ERST_PFX "ERST table is invalid\n");
|
||||
pr_err(FW_BUG "ERST table is invalid.\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
@@ -1169,21 +1166,19 @@ static int __init erst_init(void)
|
||||
rc = erst_get_erange(&erst_erange);
|
||||
if (rc) {
|
||||
if (rc == -ENODEV)
|
||||
pr_info(ERST_PFX
|
||||
pr_info(
|
||||
"The corresponding hardware device or firmware implementation "
|
||||
"is not available.\n");
|
||||
else
|
||||
pr_err(ERST_PFX
|
||||
"Failed to get Error Log Address Range.\n");
|
||||
pr_err("Failed to get Error Log Address Range.\n");
|
||||
goto err_unmap_reg;
|
||||
}
|
||||
|
||||
r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST");
|
||||
if (!r) {
|
||||
pr_err(ERST_PFX
|
||||
"Can not request iomem region <0x%16llx-0x%16llx> for ERST.\n",
|
||||
(unsigned long long)erst_erange.base,
|
||||
(unsigned long long)erst_erange.base + erst_erange.size);
|
||||
pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n",
|
||||
(unsigned long long)erst_erange.base,
|
||||
(unsigned long long)erst_erange.base + erst_erange.size - 1);
|
||||
rc = -EIO;
|
||||
goto err_unmap_reg;
|
||||
}
|
||||
@@ -1193,7 +1188,7 @@ static int __init erst_init(void)
|
||||
if (!erst_erange.vaddr)
|
||||
goto err_release_erange;
|
||||
|
||||
pr_info(ERST_PFX
|
||||
pr_info(
|
||||
"Error Record Serialization Table (ERST) support is initialized.\n");
|
||||
|
||||
buf = kmalloc(erst_erange.size, GFP_KERNEL);
|
||||
@@ -1205,15 +1200,15 @@ static int __init erst_init(void)
|
||||
rc = pstore_register(&erst_info);
|
||||
if (rc) {
|
||||
if (rc != -EPERM)
|
||||
pr_info(ERST_PFX
|
||||
"Could not register with persistent store\n");
|
||||
pr_info(
|
||||
"Could not register with persistent store.\n");
|
||||
erst_info.buf = NULL;
|
||||
erst_info.bufsize = 0;
|
||||
kfree(buf);
|
||||
}
|
||||
} else
|
||||
pr_err(ERST_PFX
|
||||
"Failed to allocate %lld bytes for persistent store error log\n",
|
||||
pr_err(
|
||||
"Failed to allocate %lld bytes for persistent store error log.\n",
|
||||
erst_erange.size);
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -409,6 +409,34 @@ static void ghes_clear_estatus(struct ghes *ghes)
|
||||
ghes->flags &= ~GHES_TO_CLEAR;
|
||||
}
|
||||
|
||||
static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
|
||||
{
|
||||
#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
|
||||
unsigned long pfn;
|
||||
int sec_sev = ghes_severity(gdata->error_severity);
|
||||
struct cper_sec_mem_err *mem_err;
|
||||
mem_err = (struct cper_sec_mem_err *)(gdata + 1);
|
||||
|
||||
if (sec_sev == GHES_SEV_CORRECTED &&
|
||||
(gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) &&
|
||||
(mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)) {
|
||||
pfn = mem_err->physical_addr >> PAGE_SHIFT;
|
||||
if (pfn_valid(pfn))
|
||||
memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE);
|
||||
else if (printk_ratelimit())
|
||||
pr_warn(FW_WARN GHES_PFX
|
||||
"Invalid address in generic error data: %#llx\n",
|
||||
mem_err->physical_addr);
|
||||
}
|
||||
if (sev == GHES_SEV_RECOVERABLE &&
|
||||
sec_sev == GHES_SEV_RECOVERABLE &&
|
||||
mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
|
||||
pfn = mem_err->physical_addr >> PAGE_SHIFT;
|
||||
memory_failure_queue(pfn, 0, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void ghes_do_proc(struct ghes *ghes,
|
||||
const struct acpi_hest_generic_status *estatus)
|
||||
{
|
||||
@@ -428,15 +456,7 @@ static void ghes_do_proc(struct ghes *ghes,
|
||||
apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
|
||||
mem_err);
|
||||
#endif
|
||||
#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
|
||||
if (sev == GHES_SEV_RECOVERABLE &&
|
||||
sec_sev == GHES_SEV_RECOVERABLE &&
|
||||
mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
|
||||
unsigned long pfn;
|
||||
pfn = mem_err->physical_addr >> PAGE_SHIFT;
|
||||
memory_failure_queue(pfn, 0, 0);
|
||||
}
|
||||
#endif
|
||||
ghes_handle_memory_failure(gdata, sev);
|
||||
}
|
||||
#ifdef CONFIG_ACPI_APEI_PCIEAER
|
||||
else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include <linux/io.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <acpi/apei.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "apei-internal.h"
|
||||
|
||||
@@ -121,6 +122,41 @@ int apei_hest_parse(apei_hest_func_t func, void *data)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(apei_hest_parse);
|
||||
|
||||
/*
|
||||
* Check if firmware advertises firmware first mode. We need FF bit to be set
|
||||
* along with a set of MC banks which work in FF mode.
|
||||
*/
|
||||
static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data)
|
||||
{
|
||||
#ifdef CONFIG_X86_MCE
|
||||
int i;
|
||||
struct acpi_hest_ia_corrected *cmc;
|
||||
struct acpi_hest_ia_error_bank *mc_bank;
|
||||
|
||||
if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
|
||||
return 0;
|
||||
|
||||
cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
|
||||
if (!cmc->enabled)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* We expect HEST to provide a list of MC banks that report errors
|
||||
* in firmware first mode. Otherwise, return non-zero value to
|
||||
* indicate that we are done parsing HEST.
|
||||
*/
|
||||
if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks)
|
||||
return 1;
|
||||
|
||||
pr_info(HEST_PFX "Enabling Firmware First mode for corrected errors.\n");
|
||||
|
||||
mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1);
|
||||
for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
|
||||
mce_disable_bank(mc_bank->bank_number);
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct ghes_arr {
|
||||
struct platform_device **ghes_devs;
|
||||
unsigned int count;
|
||||
@@ -227,6 +263,9 @@ void __init acpi_hest_init(void)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!acpi_disable_cmcff)
|
||||
apei_hest_parse(hest_parse_cmc, NULL);
|
||||
|
||||
if (!ghes_disable) {
|
||||
rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
|
||||
if (rc)
|
||||
|
||||
Reference in New Issue
Block a user