Tag Archives: # Python

[Solved] Jupyter Notebook Error: SparkException: Python worker failed to connect back

report errors

---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
<ipython-input-24-bafca16b0526> in <module>
      8     return jobitem, ratingsRDD
      9 jobitem, jobRDD = preparJobdata(sc)
---> 10 jobRDD.collect() 

G:\Projects\python-3.6.4-amd64\lib\site-packages\pyspark\rdd.py in collect(self)
    947         """
    948         with SCCallSiteSync(self.context) as css:
--> 949             sock_info = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
    950         return list(_load_from_socket(sock_info, self._jrdd_deserializer))
    951 

G:\Projects\python-3.6.4-amd64\lib\site-packages\py4j\java_gateway.py in __call__(self, *args)
   1303         answer = self.gateway_client.send_command(command)
   1304         return_value = get_return_value(
-> 1305             answer, self.gateway_client, self.target_id, self.name)
   1306 
   1307         for temp_arg in temp_args:

G:\Projects\python-3.6.4-amd64\lib\site-packages\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name)
    326                 raise Py4JJavaError(
    327                     "An error occurred while calling {0}{1}{2}.\n".
--> 328                     format(target_id, ".", name), value)
    329             else:
    330                 raise Py4JError(

Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0) (192.168.101.68 executor driver): org.apache.spark.SparkException: Python worker failed to connect back.
	at org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:182)
	at org.apache.spark.api.python.PythonWorkerFactory.create(PythonWorkerFactory.scala:107)
	at org.apache.spark.SparkEnv.createPythonWorker(SparkEnv.scala:119)
	at org.apache.spark.api.python.BasePythonRunner.compute(PythonRunner.scala:145)
	at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:65)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: java.net.SocketTimeoutException: Accept timed out
	at java.net.DualStackPlainSocketImpl.waitForNewConnection(Native Method)
	at java.net.DualStackPlainSocketImpl.socketAccept(DualStackPlainSocketImpl.java:135)
	at java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:409)
	at java.net.PlainSocketImpl.accept(PlainSocketImpl.java:199)
	at java.net.ServerSocket.implAccept(ServerSocket.java:545)
	at java.net.ServerSocket.accept(ServerSocket.java:513)
	at org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:174)
	... 14 more

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2253)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2202)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2201)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2201)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1078)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1078)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1078)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2440)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2382)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2371)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:868)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2202)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2223)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2242)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2267)
	at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
	at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
	at org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:180)
	at org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.spark.SparkException: Python worker failed to connect back.
	at org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:182)
	at org.apache.spark.api.python.PythonWorkerFactory.create(PythonWorkerFactory.scala:107)
	at org.apache.spark.SparkEnv.createPythonWorker(SparkEnv.scala:119)
	at org.apache.spark.api.python.BasePythonRunner.compute(PythonRunner.scala:145)
	at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:65)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	... 1 more
Caused by: java.net.SocketTimeoutException: Accept timed out
	at java.net.DualStackPlainSocketImpl.waitForNewConnection(Native Method)
	at java.net.DualStackPlainSocketImpl.socketAccept(DualStackPlainSocketImpl.java:135)
	at java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:409)
	at java.net.PlainSocketImpl.accept(PlainSocketImpl.java:199)
	at java.net.ServerSocket.implAccept(ServerSocket.java:545)
	at java.net.ServerSocket.accept(ServerSocket.java:513)
	at org.apache.spark.api.python.PythonWorkerFactory.createSimpleWorker(PythonWorkerFactory.scala:174)
	... 14 more

Solution:

The following variable environments are configured:

# Windows Hadoop variable environments are configured
HADOOP_HOME = F:\hadoop-common-2.2.0-bin-master\hadoop-common-2.2.0-bin-master

# Windows JDKvariable environments are configured
JAVA_HOME = F:\jdk-8u121-windows-x64_8.0.1210.13

# Windows Pysparkvariable environments are configured
PYSPARK_DRIVER_PYTHON = jupyter
PYSPARK_DRIVER_PYTHON_OPTS = notebook
PYSPARK_PYTHON = python

Remember to restart the computer after the configuration is completed!

[Solved] Python matplotlib Error: RuntimeError: In set_size: Could not set the fontsize…

Problem
Error when saving image:RuntimeError: In set_size: Could not set the fontsize

Traceback (most recent call last):
  File "/Users/robin/MLcode/Pycharm_Project/tensorflow/2021/0823_face_recognition_environment/0827_img_quality_analysis_v4.py", line 1556, in <module>
    image_cluster_analysis()
  File "/Users/robin/MLcode/Pycharm_Project/tensorflow/2021/0823_face_recognition_environment/0827_img_quality_analysis_v4.py", line 1549, in image_cluster_analysis
    image_showing(img_compressed)
  File "/Users/robin/MLcode/Pycharm_Project/tensorflow/2021/0823_face_recognition_environment/0827_img_quality_analysis_v4.py", line 1408, in image_showing
    plt.savefig(img_name)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/pyplot.py", line 722, in savefig
    res = fig.savefig(*args, **kwargs)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/figure.py", line 2180, in savefig
    self.canvas.print_figure(fname, **kwargs)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/backend_bases.py", line 2082, in print_figure
    **kwargs)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/backends/backend_agg.py", line 579, in print_jpg
    buf, size = self.print_to_buffer()
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/backends/backend_agg.py", line 535, in print_to_buffer
    FigureCanvasAgg.draw(self)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/backends/backend_agg.py", line 388, in draw
    self.figure.draw(self.renderer)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/artist.py", line 38, in draw_wrapper
    return draw(artist, renderer, *args, **kwargs)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/figure.py", line 1709, in draw
    renderer, self, artists, self.suppressComposite)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/image.py", line 135, in _draw_list_compositing_images
    a.draw(renderer)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/artist.py", line 38, in draw_wrapper
    return draw(artist, renderer, *args, **kwargs)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/axes/_base.py", line 2607, in draw
    self._update_title_position(renderer)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/axes/_base.py", line 2556, in _update_title_position
    if title.get_window_extent(renderer).ymin < top:
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/text.py", line 890, in get_window_extent
    bbox, info, descent = self._get_layout(self._renderer)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/text.py", line 291, in _get_layout
    ismath="TeX" if self.get_usetex() else False)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/backends/backend_agg.py", line 210, in get_text_width_height_descent
    font = self._get_agg_font(prop)
  File "/Users/robin/software/anaconda3/envs/tensorflow/lib/python3.6/site-packages/matplotlib/backends/backend_agg.py", line 250, in _get_agg_font
    font.set_size(size, self.dpi)
RuntimeError: In set_size: Could not set the fontsize

Solution:

The source of the problem is figure_Size and DPI :

The following two codes are different:

figure_size = (6.40, 4.80)
plt.figure(figsize=figure_size, dpi=100)

And:

figure_size = (640, 480)
plt.figure(figsize=figure_size, dpi=1)

Note:

figsize : width, height in inches, default: (6.4, 4.8) , that is, the picture size in inches. The default value in Matplotlib is (6.4, 40.8) DPI : dots (or pixels) per inch, default: 100.0 , that is, the number of pixels per inch. The default value is 100

Although the above two representations seem to be the same, an error is reported by using DPI = 1. (I won’t delve into it for the time being)

[Perfectly Solved] attributeerror: module ‘SciPy. Misc’ has no attribute ‘toimage’ error

Problem Description:

When learning the MNIST machine learning introductory course, the attributeerror: module ‘SciPy. Misc’ has no attribute ‘toimage’ error occurs when converting the numpy array into a picture and saving it
the execution code is as follows:

# Save the first 20 images
for i in range(20):
    image_array = train_images[i]
    #Save file as the format:mnist_train_0.jpg,mnist_train_1.jpg,...
    filename = save_dir + '/mnist_train_%d.jpg' %i
    #save image_array to image
    # use scipy.misc.toimage to convert to image and save
    scipy.misc.toimage(image_array, cmin=0.0, cmax=1.0).save(filename)

Crux attribution:
I checked the scene where Python 3 uses the toimage() function on the Internet and found that this function has been deprecated. Many tutorials recommend reducing the version of the third-party SciPy to match the use of the toimage() function. I feel that this is a bit “married down” and can not fundamentally solve the problem. Moreover, the iteration of technology update is inevitable, We need to follow the trend.

Scipy.misc.toimage official latest instructions.

Solution:

Turn the problem into how to save the array as an image?

Method 1: use the .Imwrite() function of CV2 module to convert the numpy array into an image and save it. The specific codes are as follows:

cv2.imwrite(filename, image_array)  #Mutual transformation of images and numpy arrays using cv2

Method 2: use the .Fromarray() function under Image in PIL module to convert the numpy array into an image and save it. The specific codes are as follows:

from PIL import Image
Image.fromarray((image_array)).save(filename)  #Mutual transformation of images and numpy arrays using PIL
# or
Image.fromarray((image_array*255).astype('uint8'), mode='L').convert('RGB').save(filename)  # For processing color images

Method 3: use the Matplotlib module to convert the numpy array into a picture and save it. (including two methods): (1) preferred recommendation:   Use the . Imsave() function under Image in the Matplotlib module to convert the numpy array into an image for saving. The specific codes are as follows:

from matplotlib import image
image.imsave(filename,image_array,cmap='gray')  # cmap is often used to change the drawing style, such as black and white gray, emerald green virdidis

(2) Not recommended:   Use the .Savefig() function under pyplot in Matplotlib module to convert the numpy array into a picture for saving. The generated picture contains coordinate axis and border information. The specific codes are as follows:

import matplotlib.pyplot as plt
# Drawing pictures
plt.imshow(image_array,cmap='gray')
# save image
plt.savefig(filename) # In this case, the picture variable is already specified when drawing the picture, so there is no need to specify it again when saving

[Solved] PyTorch Caught RuntimeError in DataLoader worker process 0和invalid argument 0: Sizes of tensors mus

The error is as follows:

Traceback (most recent call last):
  File "/home/jiang/miniconda3/envs/Net/lib/python3.6/site-packages/tqdm/std.py", line 1178, in __iter__
    for obj in iterable:
  File "/home/jiang/miniconda3/envs/Net/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 819, in __next__
    return self._process_data(data)
  File "/home/jiang/miniconda3/envs/Net/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 846, in _process_data
    data.reraise()
  File "/home/jiang/miniconda3/envs/Net/lib/python3.6/site-packages/torch/_utils.py", line 369, in reraise
    raise self.exc_type(msg)
RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/jiang/miniconda3/envs/Net/lib/python3.6/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/jiang/miniconda3/envs/Net/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 47, in fetch
    return self.collate_fn(data)
  File "/home/jiang/miniconda3/envs/Net/lib/python3.6/site-packages/torch/utils/data/_utils/collate.py", line 75, in default_collate
    return {key: default_collate([d[key] for d in batch]) for key in elem}
  File "/home/jiang/miniconda3/envs/Net/lib/python3.6/site-packages/torch/utils/data/_utils/collate.py", line 75, in <dictcomp>
    return {key: default_collate([d[key] for d in batch]) for key in elem}
  File "/home/jiang/miniconda3/envs/Net/lib/python3.6/site-packages/torch/utils/data/_utils/collate.py", line 65, in default_collate
    return default_collate([torch.as_tensor(b) for b in batch])
  File "/home/jiang/miniconda3/envs/Net/lib/python3.6/site-packages/torch/utils/data/_utils/collate.py", line 56, in default_collate
    return torch.stack(batch, 0, out=out)
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 8 and 16 in dimension 1 at /pytorch/aten/src/TH/generic/THTensor.cpp:689

In __ getitem__ function does get the data, so the problem lies in torch. Utils. Data. Dataloader

analysis

In fact, there are two mistakes

RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 8 and 16 in dimension 1 at /pytorch/aten/src/TH/generic/THTensor.cpp:689

Prompt for inconsistent data dimensions, jump toFile "/home/jiang/miniconda3/envs/Net/lib/python3.6/site-packages/torch/utils/data/_utils/collate.py", line 56, in default_collate return torch.stack(batch, 0, out=out) Source file at :

  if isinstance(elem, torch.Tensor):
   out = None
   if torch.utils.data.get_worker_info() is not None:
       # If we're in a background process, concatenate directly into a
       # shared memory tensor to avoid an extra copy
       numel = sum([x.numel() for x in batch])
       storage = elem.storage()._new_shared(numel)
       out = elem.new(storage)
   return torch.stack(batch, 0, out=out)

It can be found that the dataloader needs to merge at the end. If the batchsize is set, then this is the process of batch merging. If the dimensions are not unified, an error will be reported.

Another error is to enable multi threading (Num)_ workers!= 0) prompt which thread has a problem. Because the dimensions of batch merge are different, the first thread will hang (worker process 0), so runtimeerror: caught runtimeerror in dataloader worker process 0. will be prompted

Solution:

Since the dimensions are not unified, it’s good to ensure that the dimensions are the same. You can set a large enough array or tent in advance, and mark the unfilled part. When you read the data, you can determine the valid data according to the mark.