使用 dmesg 来查看一些硬件或驱动程序的信息或问题。

Elasticsearch 分片分配失败

Elasticsearch | 作者 yimusidian | 发布于2022年05月07日 | 阅读数:1634

请问下,我的设备异常断电后,有个别索引出现了分片无法分配的问题,我现在的解决方案是通过 accept_data_loss 接受数据丢失,对于这个问题还有其他的解决方案吗
[root@localhost 0]# curl 'localhost:31600/_cluster/allocation/explain?pretty'
{
"index" : "event20220429_v0",
"shard" : 3,
"primary" : true,
"current_state" : "unassigned",
"unassigned_info" : {
"reason" : "ALLOCATION_FAILED",
"at" : "2022-05-06T09:54:06.907Z",
"failed_allocation_attempts" : 5,
"details" : "failed shard on node [eI0BZkerSe-cZGse4jTudQ]: failed recovery, failure RecoveryFailedException[[event20220429_v0][3]: Recovery failed on {eI0BZke}{eI0BZkerSe-cZGse4jTudQ}{ZxpgodkfR0aLlsTt3vtROA}{127.0.0.1}{127.0.0.1:31701}{xpack.installed=true}]; nested: IndexShardRecoveryException[failed to recover from gateway]; nested: EngineException[failed to recover from translog]; nested: EOFException[read past EOF. pos [44072670] length: [4] end: [44072670]]; ",
"last_allocation_status" : "no"
},
"can_allocate" : "no",
"allocate_explanation" : "cannot allocate because allocation is not permitted to any of the nodes that hold an in-sync shard copy",
"node_allocation_decisions" : [
{
"node_id" : "eI0BZkerSe-cZGse4jTudQ",
"node_name" : "eI0BZke",
"transport_address" : "127.0.0.1:31701",
"node_attributes" : {
"xpack.installed" : "true"
},
"node_decision" : "no",
"store" : {
"in_sync" : true,
"allocation_id" : "XCtc-qLzR3SlhICcWzkRqQ"
},
"deciders" : [
{
"decider" : "max_retry",
"decision" : "NO",
"explanation" : "shard has exceeded the maximum number of retries [5] on failed allocation attempts - manually call [/_cluster/reroute?retry_failed=true] to retry, [unassigned_info[[reason=ALLOCATION_FAILED], at[2022-05-06T09:54:06.907Z], failed_attempts[5], delayed=false, details[failed shard on node [eI0BZkerSe-cZGse4jTudQ]: failed recovery, failure RecoveryFailedException[[event20220429_v0][3]: Recovery failed on {eI0BZke}{eI0BZkerSe-cZGse4jTudQ}{ZxpgodkfR0aLlsTt3vtROA}{127.0.0.1}{127.0.0.1:31701}{xpack.installed=true}]; nested: IndexShardRecoveryException[failed to recover from gateway]; nested: EngineException[failed to recover from translog]; nested: EOFException[read past EOF. pos [44072670] length: [4] end: [44072670]]; ], allocation_status[deciders_no]]]"
}
]
},
{
"node_id" : "wIgVy_2ZTVuDezTabRGDeQ",
"node_name" : "wIgVy_2",
"transport_address" : "127.0.0.1:31700",
"node_attributes" : {
"xpack.installed" : "true"
},
"node_decision" : "no",
"store" : {
"in_sync" : true,
"allocation_id" : "YZJd56cMQqC3ZSXktoJx-Q"
},
"deciders" : [
{
"decider" : "max_retry",
"decision" : "NO",
"explanation" : "shard has exceeded the maximum number of retries [5] on failed allocation attempts - manually call [/_cluster/reroute?retry_failed=true] to retry, [unassigned_info[[reason=ALLOCATION_FAILED], at[2022-05-06T09:54:06.907Z], failed_attempts[5], delayed=false, details[failed shard on node [eI0BZkerSe-cZGse4jTudQ]: failed recovery, failure RecoveryFailedException[[event20220429_v0][3]: Recovery failed on {eI0BZke}{eI0BZkerSe-cZGse4jTudQ}{ZxpgodkfR0aLlsTt3vtROA}{127.0.0.1}{127.0.0.1:31701}{xpack.installed=true}]; nested: IndexShardRecoveryException[failed to recover from gateway]; nested: EngineException[failed to recover from translog]; nested: EOFException[read past EOF. pos [44072670] length: [4] end: [44072670]]; ], allocation_status[deciders_no]]]"
}
]
}
]
}
[root@localhost 0]#
已邀请:

Charele - Cisco4321

赞同来自:

日志文件损坏,没啥完美解决方案。
 
accept_data_loss,会丢失最后一次flush后的数据。其他没什么大碍。

要回复问题请先登录注册