1 year ago

#385393

test-img

Rajesh Kontham

Python Runtime Error during enumerating 'DataLoader' in for loop

I am new to python and torch. Please advice.

So, I was trying to enumerating a dataloader to try getting images in batches for DL in GAN. But this was leading the function to have 'Runtime Error' at some multiprocessing and asking to add 'Freeze support', which I did. But still, same error was occurring. Please check the following code:

For Dataset:

import glob
import random 

from torch.utils.data   import Dataset
from PIL                import Image

import torchvision.transforms as transforms

class ImageDataset(Dataset):
    def __init__(self, root_hr, root_lr,  hr_shape):
        hr_height, hr_width = hr_shape
        # Low Resolution Images
        self.tensor_transform_LR = transforms.Compose(
            [
                transforms.Resize((hr_height // 4, hr_width // 4), Image.BICUBIC),
                transforms.ToTensor(),
            ]
        )
        # High Resolution Images
        self.tensor_transform_HR = transforms.Compose(
            [
                transforms.Resize((hr_height , hr_width ), Image.BICUBIC),
                transforms.ToTensor(),
            ]
        )
        
        self.fileList_hr = sorted(glob.glob(root_hr + "\*.*"))
        self.fileList_lr = sorted(glob.glob(root_lr + "\*.*"))
        
        if len(self.fileList_hr) != len(self.fileList_lr):
            print(' 4 Frame and 16 Frame image count is different' )
            exit()

        
    def __getitem__(self, index):
    
        print('Dataset index : ' + str(index))

        img_input_hr    = Image.open(self.fileList_hr[index % len ( self.fileList_hr)] )
        img_input_lr    = Image.open(self.fileList_lr[index % len ( self.fileList_lr)] )
        
        img_tensor_lr   = self.tensor_transform_LR(img_input_lr)
        img_tensor_hr   = self.tensor_transform_HR(img_input_hr)
        #img_output     = img_tensor.unsqueeze(0)
        
        return img_tensor_lr, img_tensor_hr
        
    def __len__(self):
        return len(self.fileList_hr)
        

Then I have following code where I am trying to get images in batches using for loop.

import os
import torch.nn as nn
import torch.nn.functional as F
import torch

from torch.utils.data   import DataLoader
from torch.autograd     import Variable
from torch import multiprocessing 

from im_datasets import ImageDataset

import itertools
import torchvision.transforms as T
import numpy as np
from torchvision.utils import save_image


def run():
    mp.freeze_support()
    print('Freeze Loop')
    
if __name__ == '__name__':
    run()
    

strDirPath = os.path.dirname(os.path.realpath(__file__))
#print(strDirPath)

os.makedirs("images/training",  exist_ok = True)
os.makedirs("save_models",      exist_ok = True)

dataloader  = DataLoader(
        ImageDataset("%s\\Data\\DeNoise16" %strDirPath,
            "%s\\Data\\Noisy4" %strDirPath,
            hr_shape = (128, 128)), 
        batch_size  = 4, 
        shuffle     = True, 
        num_workers = 4,
        )
        
intLenOfDataset = int(len(dataloader.dataset))
print('Length of dataset: ' + str(intLenOfDataset))

b = True

if b:
# for epoch in range(10):
    # running_loss=0.0
    
    #get the data
    for batch_num, (lq_batch,hq_batch) in enumerate(dataloader):        
        print(batch_num, hq_batch.shape, lq_batch.shape)

For loop shown again to easy view.

    for batch_num, (lq_batch,hq_batch) in enumerate(dataloader):        
        print(batch_num, hq_batch.shape, lq_batch.shape)

Then following output was given:

Length of dataset: 450
Length of dataset: 450
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\spawn.py", line 125, in _main
    prepare(preparation_data)
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\spawn.py", line 236, in prepare
    _fixup_main_from_path(data['init_main_from_path'])
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
    main_content = runpy.run_path(main_path,
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\runpy.py", line 265, in run_path
    return _run_module_code(code, init_globals, run_name,
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\runpy.py", line 97, in _run_module_code
    _run_code(code, mod_globals, init_globals,
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "D:\Kontham\Projects\ReMeasure\Dynamic\Python\20220401\test.py", line 60, in <module>
    for batch_num, (lq_batch,hq_batch) in enumerate(dataloader):
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\site-packages\torch\utils\data\dataloader.py", line 291, in __iter__
    return _MultiProcessingDataLoaderIter(self)
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\site-packages\torch\utils\data\dataloader.py", line 737, in __init__
    w.start()
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\process.py", line 121, in start
    self._popen = self._Popen(self)
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\context.py", line 224, in _Popen
    return _default_context.get_context().Process._Popen(process_obj)
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\context.py", line 327, in _Popen
    return Popen(process_obj)
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\popen_spawn_win32.py", line 45, in __init__
    prep_data = spawn.get_preparation_data(process_obj._name)
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\spawn.py", line 154, in get_preparation_data
    _check_not_importing_main()
  File "D:\Kontham\Softwares\Anaconda\envs\py38\lib\multiprocessing\spawn.py", line 134, in _check_not_importing_main
    raise RuntimeError( 

        RuntimeError:
        An attempt has been made to start a new process before the
        current process has finished its bootstrapping phase.

        This probably means that you are not using fork to start your
        child processes and you have forgotten to use the proper idiom
        in the main module:

            if __name__ == '__main__':
                freeze_support()
                ...

        The "freeze_support()" line can be omitted if the program
        is not going to be frozen to produce an executable.

Then I tried to change the 'for loop' command as by enumerating dataset of dataloader, rather than dataloader itself.

    for batch_num, (lq_batch,hq_batch) in enumerate(dataloader.dataset):
        
        print(batch_num, hq_batch.shape, lq_batch.shape)

Then the for loop didn't end and the shape of the batches is wrong. I am only getting a single image of either folders and not a batch of 4 images from each folder.

.
.
207 torch.Size([3, 128, 128]) torch.Size([3, 32, 32])
Dataset index : 208
208 torch.Size([3, 128, 128]) torch.Size([3, 32, 32])
Dataset index : 209
209 torch.Size([3, 128, 128]) torch.Size([3, 32, 32]
.
.

What I want for training images would be batches of images. Like:

.
.
Dataset index : 112
112 torch.Size([4,3, 128, 128]) torch.Size([4,3, 32, 32]

Since, I have 450 images in either folder and batch size of 4 450 / 4 ~= 112.xxx -> 113 And each batch should have 4 images.

Sorry for making a big post, but i wanted to be as descriptive as possible.

python

pytorch

pytorch-dataloader

0 Answers

Your Answer

Accepted video resources