看,灰机...

使用spark向elasticsearch中写入数据异常

Elasticsearch | 作者 Gitai | 发布于2017年06月13日 | 阅读数:6781

scala 2.11.11
spark 2.1.1
elasticsearch 5.4.1
elasticsearch-spark-20-2.11-5.4.1
 

 
17/06/13 12:31:23 INFO util.Version: Elasticsearch Hadoop v5.4.1 [2425ec2a56]
17/06/13 12:31:24 INFO rdd.EsRDDWriter: Writing to [spark/docs]
17/06/13 12:33:32 INFO httpclient.HttpMethodDirector: I/O exception (java.net.ConnectException) caught when processing request: Connection timed out (Connection timed out)
17/06/13 12:33:32 INFO httpclient.HttpMethodDirector: Retrying request
17/06/13 12:33:32 INFO httpclient.HttpMethodDirector: I/O exception (java.net.ConnectException) caught when processing request: Connection timed out (Connection timed out)
17/06/13 12:33:32 INFO httpclient.HttpMethodDirector: Retrying request
17/06/13 12:33:32 INFO httpclient.HttpMethodDirector: I/O exception (java.net.ConnectException) caught when processing request: Connection timed out (Connection timed out)
17/06/13 12:33:32 INFO httpclient.HttpMethodDirector: Retrying request
17/06/13 12:33:32 INFO httpclient.HttpMethodDirector: I/O exception (java.net.ConnectException) caught when processing request: Connection timed out (Connection timed out)
17/06/13 12:39:53 INFO rdd.EsRDDWriter: Writing to [spark/docs]
17/06/13 12:39:54 ERROR executor.Executor: Exception in task 0.0 in stage 0.0 (TID 0)
org.elasticsearch.hadoop.rest.EsHadoopNoNodesLeftException: Connection error (check network and/or proxy settings)- all nodes failed; tried [[172.17.0.2:9200]]
at org.elasticsearch.hadoop.rest.NetworkClient.execute(NetworkClient.java:150)
at org.elasticsearch.hadoop.rest.RestClient.execute(RestClient.java:461)
at org.elasticsearch.hadoop.rest.RestClient.executeNotFoundAllowed(RestClient.java:469)
at org.elasticsearch.hadoop.rest.RestClient.exists(RestClient.java:547)
at org.elasticsearch.hadoop.rest.RestClient.touch(RestClient.java:553)
at org.elasticsearch.hadoop.rest.RestRepository.touch(RestRepository.java:412)
at org.elasticsearch.hadoop.rest.RestService.initSingleIndex(RestService.java:607)
at org.elasticsearch.hadoop.rest.RestService.createWriter(RestService.java:595)
at org.elasticsearch.spark.rdd.EsRDDWriter.write(EsRDDWriter.scala:58)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
17/06/13 12:39:54 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): org.elasticsearch.hadoop.rest.EsHadoopNoNodesLeftException: Connection error (check network and/or proxy settings)- all nodes failed; tried [[172.17.0.2:9200]]
at org.elasticsearch.hadoop.rest.NetworkClient.execute(NetworkClient.java:150)
at org.elasticsearch.hadoop.rest.RestClient.execute(RestClient.java:461)
at org.elasticsearch.hadoop.rest.RestClient.executeNotFoundAllowed(RestClient.java:469)
at org.elasticsearch.hadoop.rest.RestClient.exists(RestClient.java:547)
at org.elasticsearch.hadoop.rest.RestClient.touch(RestClient.java:553)
at org.elasticsearch.hadoop.rest.RestRepository.touch(RestRepository.java:412)
at org.elasticsearch.hadoop.rest.RestService.initSingleIndex(RestService.java:607)
at org.elasticsearch.hadoop.rest.RestService.createWriter(RestService.java:595)
at org.elasticsearch.spark.rdd.EsRDDWriter.write(EsRDDWriter.scala:58)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)

17/06/13 12:39:54 ERROR scheduler.TaskSetManager: Task 0 in stage 0.0 failed 1 times; aborting job
17/06/13 12:39:54 INFO scheduler.TaskSchedulerImpl: Cancelling stage 0
17/06/13 12:39:54 INFO executor.Executor: Executor is trying to kill task 1.0 in stage 0.0 (TID 1)
17/06/13 12:39:54 INFO executor.Executor: Executor is trying to kill task 2.0 in stage 0.0 (TID 2)
17/06/13 12:39:54 INFO executor.Executor: Executor is trying to kill task 3.0 in stage 0.0 (TID 3)
17/06/13 12:39:54 INFO scheduler.TaskSchedulerImpl: Stage 0 was cancelled
17/06/13 12:39:54 INFO scheduler.DAGScheduler: ResultStage 0 (runJob at EsSpark.scala:102) failed in 518.360 s due to Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): org.elasticsearch.hadoop.rest.EsHadoopNoNodesLeftException: Connection error (check network and/or proxy settings)- all nodes failed; tried [[172.17.0.2:9200]]
at org.elasticsearch.hadoop.rest.NetworkClient.execute(NetworkClient.java:150)
at org.elasticsearch.hadoop.rest.RestClient.execute(RestClient.java:461)
at org.elasticsearch.hadoop.rest.RestClient.executeNotFoundAllowed(RestClient.java:469)
at org.elasticsearch.hadoop.rest.RestClient.exists(RestClient.java:547)
at org.elasticsearch.hadoop.rest.RestClient.touch(RestClient.java:553)
at org.elasticsearch.hadoop.rest.RestRepository.touch(RestRepository.java:412)
at org.elasticsearch.hadoop.rest.RestService.initSingleIndex(RestService.java:607)
at org.elasticsearch.hadoop.rest.RestService.createWriter(RestService.java:595)
at org.elasticsearch.spark.rdd.EsRDDWriter.write(EsRDDWriter.scala:58)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)

Driver stacktrace:
17/06/13 12:39:54 INFO scheduler.DAGScheduler: Job 0 failed: runJob at EsSpark.scala:102, took 519.832656 s
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): org.elasticsearch.hadoop.rest.EsHadoopNoNodesLeftException: Connection error (check network and/or proxy settings)- all nodes failed; tried [[172.17.0.2:9200]]
at org.elasticsearch.hadoop.rest.NetworkClient.execute(NetworkClient.java:150)
at org.elasticsearch.hadoop.rest.RestClient.execute(RestClient.java:461)
at org.elasticsearch.hadoop.rest.RestClient.executeNotFoundAllowed(RestClient.java:469)
at org.elasticsearch.hadoop.rest.RestClient.exists(RestClient.java:547)
at org.elasticsearch.hadoop.rest.RestClient.touch(RestClient.java:553)
at org.elasticsearch.hadoop.rest.RestRepository.touch(RestRepository.java:412)
at org.elasticsearch.hadoop.rest.RestService.initSingleIndex(RestService.java:607)
at org.elasticsearch.hadoop.rest.RestService.createWriter(RestService.java:595)
at org.elasticsearch.spark.rdd.EsRDDWriter.write(EsRDDWriter.scala:58)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)

Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1918)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1931)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1951)
at org.elasticsearch.spark.rdd.EsSpark$.doSaveToEs(EsSpark.scala:102)
at org.elasticsearch.spark.rdd.EsSpark$.saveToEs(EsSpark.scala:76)
at org.elasticsearch.spark.rdd.EsSpark$.saveToEs(EsSpark.scala:71)
at org.elasticsearch.spark.package$SparkRDDFunctions.saveToEs(package.scala:55)
at EsTest$.main(EsTest.scala:35)
at EsTest.main(EsTest.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:738)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: org.elasticsearch.hadoop.rest.EsHadoopNoNodesLeftException: Connection error (check network and/or proxy settings)- all nodes failed; tried [[172.17.0.2:9200]]
at org.elasticsearch.hadoop.rest.NetworkClient.execute(NetworkClient.java:150)
at org.elasticsearch.hadoop.rest.RestClient.execute(RestClient.java:461)
at org.elasticsearch.hadoop.rest.RestClient.executeNotFoundAllowed(RestClient.java:469)
at org.elasticsearch.hadoop.rest.RestClient.exists(RestClient.java:547)
at org.elasticsearch.hadoop.rest.RestClient.touch(RestClient.java:553)
at org.elasticsearch.hadoop.rest.RestRepository.touch(RestRepository.java:412)
at org.elasticsearch.hadoop.rest.RestService.initSingleIndex(RestService.java:607)
at org.elasticsearch.hadoop.rest.RestService.createWriter(RestService.java:595)
at org.elasticsearch.spark.rdd.EsRDDWriter.write(EsRDDWriter.scala:58)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
已邀请:

wilbur

赞同来自:

是刚开始跑就异常?
还是跑一段时间异常? 这种情况应该是es负载过高导致超时。

lym19833

赞同来自:

问题解决了么,我也遇到了同样的问题。。

yangruideyang

赞同来自:

可能是网络问题吧

要回复问题请先登录注册