diff options
author | Michael Ellerman | 2018-06-13 23:24:14 +1000 |
---|---|---|
committer | Michael Ellerman | 2018-07-12 21:08:10 +1000 |
commit | 54dbcfc211f15586c57d27492f938eb4df964257 (patch) | |
tree | e96c32f125e4e30e4644aa432b6f15c55806244f /arch/powerpc/kernel | |
parent | e11b64b1ef336f8976e5bf194b0eede48954f419 (diff) |
powerpc/64s: Report SLB multi-hit rather than parity error
When we take an SLB multi-hit on bare metal, we see both the multi-hit
and parity error bits set in DSISR. The user manuals indicates this is
expected to always happen on Power8, whereas on Power9 it says a
multi-hit will "usually" also cause a parity error.
We decide what to do based on the various error tables in mce_power.c,
and because we process them in order and only report the first, we
currently always report a parity error but not the multi-hit, eg:
Severe Machine check interrupt [Recovered]
Initiator: CPU
Error type: SLB [Parity]
Effective address: c000000ffffd4300
Although this is correct, it leaves the user wondering why they got a
parity error. It would be clearer instead if we reported the
multi-hit because that is more likely to be simply a software bug,
whereas a true parity error is possibly an indication of a bad core.
We can do that simply by reordering the error tables so that multi-hit
appears before parity. That doesn't affect the error recovery at all,
because we flush the SLB either way.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r-- | arch/powerpc/kernel/mce_power.c | 18 |
1 files changed, 9 insertions, 9 deletions
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 38c5b4764bfe..d6756af6ec78 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -257,12 +257,12 @@ static const struct mce_derror_table mce_p7_derror_table[] = { { 0x00000400, true, MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, { 0x00000100, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0x00000080, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, { 0x00000040, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, @@ -290,12 +290,12 @@ static const struct mce_derror_table mce_p8_derror_table[] = { { 0x00000200, true, MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, { 0x00000100, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0x00000080, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, { 0, false, 0, 0, 0, 0 } }; static const struct mce_derror_table mce_p9_derror_table[] = { @@ -320,12 +320,12 @@ static const struct mce_derror_table mce_p9_derror_table[] = { { 0x00000200, false, MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, { 0x00000100, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0x00000080, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, { 0x00000040, true, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, |