1 year ago
#385393

Rajesh Kontham
Python Runtime Error during enumerating 'DataLoader' in for loop
I am new to python and torch. Please advice.
So, I was trying to enumerating a dataloader to try getting images in batches for DL in GAN. But this was leading the function to have 'Runtime Error' at some multiprocessing and asking to add 'Freeze support', which I did. But still, same error was occurring. Please check the following code:
For Dataset:
import glob
import random
from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms as transforms
class ImageDataset(Dataset):
def __init__(self, root_hr, root_lr, hr_shape):
hr_height, hr_width = hr_shape
# Low Resolution Images
self.tensor_transform_LR = transforms.Compose(
[
transforms.Resize((hr_height // 4, hr_width // 4), Image.BICUBIC),
transforms.ToTensor(),
]
)
# High Resolution Images
self.tensor_transform_HR = transforms.Compose(
[
transforms.Resize((hr_height , hr_width ), Image.BICUBIC),
transforms.ToTensor(),
]
)
self.fileList_hr = sorted(glob.glob(root_hr + "\*.*"))
self.fileList_lr = sorted(glob.glob(root_lr + "\*.*"))
if len(self.fileList_hr) != len(self.fileList_lr):
print(' 4 Frame and 16 Frame image count is different' )
exit()
def __getitem__(self, index):
print('Dataset index : ' + str(index))
img_input_hr = Image.open(self.fileList_hr[index % len ( self.fileList_hr)] )
img_input_lr = Image.open(self.fileList_lr[index % len ( self.fileList_lr)] )
img_tensor_lr = self.tensor_transform_LR(img_input_lr)
img_tensor_hr = self.tensor_transform_HR(img_input_hr)
#img_output = img_tensor.unsqueeze(0)
return img_tensor_lr, img_tensor_hr
def __len__(self):
return len(self.fileList_hr)
Then I have following code where I am trying to get images in batches using for loop.
import os
import torch.nn as nn
import torch.nn.functional as F
import torch
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torch import multiprocessing
from im_datasets import ImageDataset
import itertools
import torchvision.transforms as T
import numpy as np
from torchvision.utils import save_image
def run():
mp.freeze_support()
print('Freeze Loop')
if __name__ == '__name__':
run()
strDirPath = os.path.dirname(os.path.realpath(__file__))
#print(strDirPath)
os.makedirs("images/training", exist_ok = True)
os.makedirs("save_models", exist_ok = True)
dataloader = DataLoader(
ImageDataset("%s\\Data\\DeNoise16" %strDirPath,
"%s\\Data\\Noisy4" %strDirPath,
hr_shape = (128, 128)),
batch_size = 4,
shuffle = True,
num_workers = 4,
)
intLenOfDataset = int(len(dataloader.dataset))
print('Length of dataset: ' + str(intLenOfDataset))
b = True
if b:
# for epoch in range(10):
# running_loss=0.0
#get the data
for batch_num, (lq_batch,hq_batch) in enumerate(dataloader):
print(batch_num, hq_batch.shape, lq_batch.shape)
For loop shown again to easy view.
for batch_num, (lq_batch,hq_batch) in enumerate(dataloader):
print(batch_num, hq_batch.shape, lq_batch.shape)
Then following output was given:
Length of dataset: 450
Length of dataset: 450
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\spawn.py", line 125, in _main
prepare(preparation_data)
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
main_content = runpy.run_path(main_path,
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\runpy.py", line 265, in run_path
return _run_module_code(code, init_globals, run_name,
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\runpy.py", line 97, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "D:\Kontham\Projects\ReMeasure\Dynamic\Python\20220401\test.py", line 60, in <module>
for batch_num, (lq_batch,hq_batch) in enumerate(dataloader):
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\site-packages\torch\utils\data\dataloader.py", line 291, in __iter__
return _MultiProcessingDataLoaderIter(self)
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\site-packages\torch\utils\data\dataloader.py", line 737, in __init__
w.start()
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\popen_spawn_win32.py", line 45, in __init__
prep_data = spawn.get_preparation_data(process_obj._name)
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\spawn.py", line 154, in get_preparation_data
_check_not_importing_main()
File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\spawn.py", line 134, in _check_not_importing_main
raise RuntimeError(
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
Then I tried to change the 'for loop' command as by enumerating dataset of dataloader, rather than dataloader itself.
for batch_num, (lq_batch,hq_batch) in enumerate(dataloader.dataset):
print(batch_num, hq_batch.shape, lq_batch.shape)
Then the for loop didn't end and the shape of the batches is wrong. I am only getting a single image of either folders and not a batch of 4 images from each folder.
.
.
207 torch.Size([3, 128, 128]) torch.Size([3, 32, 32])
Dataset index : 208
208 torch.Size([3, 128, 128]) torch.Size([3, 32, 32])
Dataset index : 209
209 torch.Size([3, 128, 128]) torch.Size([3, 32, 32]
.
.
What I want for training images would be batches of images. Like:
.
.
Dataset index : 112
112 torch.Size([4,3, 128, 128]) torch.Size([4,3, 32, 32]
Since, I have 450 images in either folder and batch size of 4 450 / 4 ~= 112.xxx -> 113 And each batch should have 4 images.
Sorry for making a big post, but i wanted to be as descriptive as possible.
python
pytorch
pytorch-dataloader
0 Answers
Your Answer