Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ERROR: [pid 57194] Worker Worker(salt=783345934, workers=1, host=dev-03.jiwiredev.com, username=lblokhin, pid=48037, sudo_user=root) failed LciExtraction(campaign=21275, stat_date=2016-11-06, output_base_path=maprfs://mapr5/opt/lci/lblokhin, job_name=foo52, tvread=, saveastext=, tvwrite=)
- Traceback (most recent call last):
- File "/usr/local/lib/python2.7/dist-packages/luigi/worker.py", line 191, in run
- new_deps = self._run_get_new_deps()
- File "/usr/local/lib/python2.7/dist-packages/luigi/worker.py", line 129, in _run_get_new_deps
- task_gen = self.task.run()
- File "/code/ndlci/luigi/tasks/spark.py", line 491, in run
- spark_run_result = self.spark_run()
- File "/code/ndlci/luigi/lci/extraction.py", line 529, in spark_run
- results, expander_names = self._do_spark_run()
- File "/code/ndlci/luigi/lci/extraction.py", line 1214, in _do_spark_run
- users_count = retry_cldb(Users.count)
- File "/code/ndlci/luigi/targets/hdfs.py", line 30, in retry_cldb
- return f(*args, **kwargs)
- File "/opt/spark/current/python/pyspark/rdd.py", line 1008, in count
- return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum()
- File "/opt/spark/current/python/pyspark/rdd.py", line 999, in sum
- return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add)
- File "/opt/spark/current/python/pyspark/rdd.py", line 873, in fold
- vals = self.mapPartitions(func).collect()
- File "/opt/spark/current/python/pyspark/rdd.py", line 776, in collect
- port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
- File "/opt/spark/current/python/lib/py4j-0.10.3-src.zip/py4j/java_gateway.py", line 1133, in __call__
- answer, self.gateway_client, self.target_id, self.name)
- File "/opt/spark/current/python/pyspark/sql/utils.py", line 63, in deco
- return f(*a, **kw)
- File "/opt/spark/current/python/lib/py4j-0.10.3-src.zip/py4j/protocol.py", line 319, in get_return_value
- format(target_id, ".", name), value)
- Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
- : org.apache.spark.SparkException: Job aborted due to stage failure: Task 23 in stage 10.0 failed 5 times, most recent failure: Lost task 23.4 in stage 10.0 (TID 6328, mapr5-209.jiwiredev.com): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
- File "/opt/spark/current/python/lib/pyspark.zip/pyspark/worker.py", line 159, in main
- func, profiler, deserializer, serializer = read_udfs(pickleSer, infile)
- File "/opt/spark/current/python/lib/pyspark.zip/pyspark/worker.py", line 97, in read_udfs
- arg_offsets, udf = read_single_udf(pickleSer, infile)
- File "/opt/spark/current/python/lib/pyspark.zip/pyspark/worker.py", line 78, in read_single_udf
- f, return_type = read_command(pickleSer, infile)
- File "/opt/spark/current/python/lib/pyspark.zip/pyspark/worker.py", line 54, in read_command
- command = serializer._read_with_length(file)
- File "/opt/spark/current/python/lib/pyspark.zip/pyspark/serializers.py", line 164, in _read_with_length
- return self.loads(obj)
- File "/opt/spark/current/python/lib/pyspark.zip/pyspark/serializers.py", line 422, in loads
- return pickle.loads(obj)
- File "build/bdist.linux-x86_64/egg/ndlci/luigi/lci/__init__.py", line 10, in <module>
- File "build/bdist.linux-x86_64/egg/ndlci/luigi/lci/pca.py", line 9, in <module>
- ImportError: No module named jiwire_reports.engines.mysql_db
- at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRDD.scala:193)
- at org.apache.spark.api.python.PythonRunner$$anon$1.<init>(PythonRDD.scala:234)
- at org.apache.spark.api.python.PythonRunner.compute(PythonRDD.scala:152)
- at org.apache.spark.sql.execution.python.BatchEvalPythonExec$$anonfun$doExecute$1.apply(BatchEvalPythonExec.scala:124)
- at org.apache.spark.sql.execution.python.BatchEvalPythonExec$$anonfun$doExecute$1.apply(BatchEvalPythonExec.scala:68)
- at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:785)
- at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:785)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:63)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.api.python.PairwiseRDD.compute(PythonRDD.scala:390)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:79)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:47)
- at org.apache.spark.scheduler.Task.run(Task.scala:86)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
Add Comment
Please, Sign In to add comment