Well,不要刷屏了

ES异常退出,有没有好的排查思路呢?

Elasticsearch | 作者 wangxinrong | 发布于2020年04月28日 | 阅读数:2337

出现问题的时候并没有发现有异常的写入或查询请求量,以及特别的查询语句。唯一当时在做的操作是有脚本在删旧索引,但这个也是一直有定期在做的。

日志内容如下,不知道是不是网络或者是ES本身运行过程中出错导致
[2020-04-28T13:39:19,323][ERROR][o.e.t.n.Netty4Utils ] fatal error on the network layer
at org.elasticsearch.transport.netty4.Netty4Utils.maybeDie(Netty4Utils.java:185)
at org.elasticsearch.transport.netty4.Netty4MessageChannelHandler.exceptionCaught(Netty4MessageChannelHandler.java:83)
at io.netty.channel.AbstractChannelHandlerContext.invokeExceptionCaught(AbstractChannelHandlerContext.java:285)
at io.netty.channel.AbstractChannelHandlerContext.notifyHandlerException(AbstractChannelHandlerContext.java:850)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:364)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)
at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:310)
at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:297)
at io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:413)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:265)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)
at io.netty.handler.logging.LoggingHandler.channelRead(LoggingHandler.java:241)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)
at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1334)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:926)
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:134)
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:644)
at io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:544)
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:498)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:458)
at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)
at java.lang.Thread.run(Thread.java:745)
[2020-04-28T13:39:19,332][WARN ][o.e.x.s.t.n.SecurityNetty4Transport] [XXXXXXX] exception caught on transport layer [[id: 0x7a2e548c, L:/x.x.x.x:39342 - R:x.x.x.x/x.x.x.x:9300]], closing connection
org.elasticsearch.ElasticsearchException: java.lang.StackOverflowError
at org.elasticsearch.transport.netty4.Netty4Transport.exceptionCaught(Netty4Transport.java:323) [transport-netty4-5.6.3.jar:5.6.3]
at org.elasticsearch.transport.netty4.Netty4MessageChannelHandler.exceptionCaught(Netty4MessageChannelHandler.java:84) [transport-netty4-5.6.3.jar:5.6.3]
at io.netty.channel.AbstractChannelHandlerContext.invokeExceptionCaught(AbstractChannelHandlerContext.java:285) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.AbstractChannelHandlerContext.notifyHandlerException(AbstractChannelHandlerContext.java:850) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:364) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:310) [netty-codec-4.1.13.Final.jar:4.1.13.Final]
at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:297) [netty-codec-4.1.13.Final.jar:4.1.13.Final]
at io.netty.handler.codec.ByteToMessageDecoder.callDecode(ByteToMessageDecoder.java:413) [netty-codec-4.1.13.Final.jar:4.1.13.Final]
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:265) [netty-codec-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.handler.logging.LoggingHandler.channelRead(LoggingHandler.java:241) [netty-handler-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1334) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:926) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:134) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:644) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:544) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:498) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:458) [netty-transport-4.1.13.Final.jar:4.1.13.Final]
at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858) [netty-common-4.1.13.Final.jar:4.1.13.Final]
at java.lang.Thread.run(Thread.java:745) [?:1.8.0_66]
Caused by: java.lang.StackOverflowError
at org.elasticsearch.action.search.InitialSearchPhase.skipShard(InitialSearchPhase.java:322) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.AbstractSearchAsyncAction.skipShard(AbstractSearchAsyncAction.java:321) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.maybeExecuteNext(InitialSearchPhase.java:147) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.successfulShardExecution(InitialSearchPhase.java:207) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.skipShard(InitialSearchPhase.java:323) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.AbstractSearchAsyncAction.skipShard(AbstractSearchAsyncAction.java:321) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.maybeExecuteNext(InitialSearchPhase.java:147) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.successfulShardExecution(InitialSearchPhase.java:207) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.skipShard(InitialSearchPhase.java:323) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.AbstractSearchAsyncAction.skipShard(AbstractSearchAsyncAction.java:321) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.maybeExecuteNext(InitialSearchPhase.java:147) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.successfulShardExecution(InitialSearchPhase.java:207) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.skipShard(InitialSearchPhase.java:323) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.AbstractSearchAsyncAction.skipShard(AbstractSearchAsyncAction.java:321) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.maybeExecuteNext(InitialSearchPhase.java:147) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.successfulShardExecution(InitialSearchPhase.java:207) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.skipShard(InitialSearchPhase.java:323) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.AbstractSearchAsyncAction.skipShard(AbstractSearchAsyncAction.java:321) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.maybeExecuteNext(InitialSearchPhase.java:147) ~[elasticsearch-5.6.3.jar:5.6.3]
at org.elasticsearch.action.search.InitialSearchPhase.successfulShardExecution(InitialSearchPhase.java:207) ~[elasticsearch-5.6.3.jar:5.6.3]

这个有什么好的查原因的思路吗
已邀请:

locatelli

赞同来自: byx313

5.6.3的话有已知的StackOverflow的bug,主要是发生在search phase并且shard数比较多的情况。
最好升级版本。如果不升级的话,减少“size”可以降低发生的可能性。
 

要回复问题请先登录注册