CREStereo-pytorch-nxt/dataset.py

import os
import random
import cv2
import glob
import numpy as np
from PIL import Image, ImageEnhance

from megengine.data.dataset import Dataset


class Augmentor:
    def __init__(
        self,
        image_height=384,
        image_width=512,
        max_disp=256,
        scale_min=0.6,
        scale_max=1.0,
        seed=0,
    ): 
        super().__init__()
        self.image_height = image_height
        self.image_width = image_width
        self.max_disp = max_disp
        self.scale_min = scale_min
        self.scale_max = scale_max
        self.rng = np.random.RandomState(seed)

    def chromatic_augmentation(self, img):
        random_brightness = np.random.uniform(0.8, 1.2)
        random_contrast = np.random.uniform(0.8, 1.2)
        random_gamma = np.random.uniform(0.8, 1.2)

        img = Image.fromarray(img)

        enhancer = ImageEnhance.Brightness(img)
        img = enhancer.enhance(random_brightness)
        enhancer = ImageEnhance.Contrast(img)
        img = enhancer.enhance(random_contrast)

        gamma_map = [
            255 * 1.0 * pow(ele / 255.0, random_gamma) for ele in range(256)
        ] * 3
        img = img.point(gamma_map)  # use PIL's point-function to accelerate this part

        img_ = np.array(img)

        return img_

    def __call__(self, left_img, right_img, left_disp):
        # 1. chromatic augmentation
        # left_img = self.chromatic_augmentation(left_img)
        # right_img = self.chromatic_augmentation(right_img)

        # 2. spatial augmentation
        # 2.1) rotate & vertical shift for right image
        if self.rng.binomial(1, 0.5):
            angle, pixel = 0.1, 2
            px = self.rng.uniform(-pixel, pixel)
            ag = self.rng.uniform(-angle, angle)
            image_center = (
                self.rng.uniform(0, right_img.shape[0]),
                self.rng.uniform(0, right_img.shape[1]),
            )
            rot_mat = cv2.getRotationMatrix2D(image_center, ag, 1.0)
            # right_img = right_img.transpose(2, 1, 0)
            right_img = cv2.warpAffine(
                right_img, rot_mat, right_img.shape[1::-1], flags=cv2.INTER_LINEAR
            )
            trans_mat = np.float32([[1, 0, 0], [0, 1, px]])
            right_img = cv2.warpAffine(
                right_img, trans_mat, right_img.shape[1::-1], flags=cv2.INTER_LINEAR
            )
            # right_img = right_img.transpose(1, 2, 0)

        # 2.2) random resize
        resize_scale = self.rng.uniform(self.scale_min, self.scale_max)

        left_img = cv2.resize(
            left_img,
            None,
            fx=resize_scale,
            fy=resize_scale,
            interpolation=cv2.INTER_LINEAR,
        )
        if len(left_img.shape) == 2:
            left_img.shape += 1,
            # left_img = cv2.merge([left_img, left_img, left_img])

        right_img = cv2.resize(
            right_img,
            None,
            fx=resize_scale,
            fy=resize_scale,
            interpolation=cv2.INTER_LINEAR,
        )
        if len(left_img.shape) == 2:
            left_img.shape += 1,
            # left_img = cv2.merge([left_img, left_img, left_img])

        disp_mask = (left_disp < float(self.max_disp / resize_scale)) & (left_disp > 0)
        disp_mask = disp_mask.astype("float32")
        disp_mask = cv2.resize(
            disp_mask,
            None,
            fx=resize_scale,
            fy=resize_scale,
            interpolation=cv2.INTER_LINEAR,
        )

        left_disp = (
            cv2.resize(
                left_disp,
                None,
                fx=resize_scale,
                fy=resize_scale,
                interpolation=cv2.INTER_LINEAR,
            )
            * resize_scale
        )

        # 2.3) random crop
        if len(left_img.shape) == 3:
            h, w, c = left_img.shape
        else:
            h, w = left_img.shape
            c = 1
        dx = w - self.image_width
        dy = h - self.image_height
        dy = self.rng.randint(min(0, dy), max(0, dy) + 1)
        dx = self.rng.randint(min(0, dx), max(0, dx) + 1)

        M = np.float32([[1.0, 0.0, -dx], [0.0, 1.0, -dy]])
        left_img = cv2.warpAffine(
            left_img,
            M,
            (self.image_width, self.image_height),
            flags=cv2.INTER_LINEAR,
            borderValue=0,
        )
        right_img = cv2.warpAffine(
            right_img,
            M,
            (self.image_width, self.image_height),
            flags=cv2.INTER_LINEAR,
            borderValue=0,
        )
        left_disp = cv2.warpAffine(
            left_disp,
            M,
            (self.image_width, self.image_height),
            flags=cv2.INTER_LINEAR,
            borderValue=0,
        )
        disp_mask = cv2.warpAffine(
            disp_mask,
            M,
            (self.image_width, self.image_height),
            flags=cv2.INTER_LINEAR,
            borderValue=0,
            )

        # 3. add random occlusion to right image
        if self.rng.binomial(1, 0.5):
            sx = int(self.rng.uniform(50, 100))
            sy = int(self.rng.uniform(50, 100))
            cx = int(self.rng.uniform(sx, right_img.shape[0] - sx))
            cy = int(self.rng.uniform(sy, right_img.shape[1] - sy))
            right_img[cx - sx : cx + sx, cy - sy : cy + sy] = np.mean(
                np.mean(right_img, 0), 0
            )[np.newaxis, np.newaxis]

        if len(left_img.shape) == 2:
            left_img = cv2.merge([left_img, left_img, left_img])

        return left_img, right_img, left_disp, disp_mask


class CREStereoDataset(Dataset):
    def __init__(self, root):
        super().__init__()
        self.imgs = glob.glob(os.path.join(root, "**/*_left.jpg"), recursive=True)
        self.augmentor = Augmentor(
            image_height=384,
            image_width=512,
            max_disp=256,
            scale_min=0.6,
            scale_max=1.0,
            seed=0,
        )
        self.rng = np.random.RandomState(0)

    def get_disp(self, path):
        disp = cv2.imread(path, cv2.IMREAD_UNCHANGED)
        return disp.astype(np.float32) / 32

    def __getitem__(self, index):
        # find path
        left_path = self.imgs[index]
        prefix = left_path[: left_path.rfind("_")]
        right_path = prefix + "_right.jpg"
        left_disp_path = prefix + "_left.disp.png"
        right_disp_path = prefix + "_right.disp.png"

        # read img, disp
        left_img = cv2.imread(left_path, cv2.IMREAD_COLOR)
        right_img = cv2.imread(right_path, cv2.IMREAD_COLOR)
        left_disp = self.get_disp(left_disp_path)
        right_disp = self.get_disp(right_disp_path)

        if self.rng.binomial(1, 0.5):
            left_img, right_img = np.fliplr(right_img), np.fliplr(left_img)
            left_disp, right_disp = np.fliplr(right_disp), np.fliplr(left_disp)
        left_disp[left_disp == np.inf] = 0

        # augmentaion
        # left_img, right_img, left_disp, disp_mask = self.augmentor(
        _, _, left_disp, disp_mask = self.augmentor(
            left_img, right_img, left_disp
        )

        left_img = left_img.transpose(2, 0, 1).astype("uint8")
        right_img = right_img.transpose(2, 0, 1).astype("uint8")

        return {
            "left": left_img,
            "right": right_img,
            "disparity": left_disp,
            "mask": disp_mask,
        }

    def __len__(self):
        return len(self.imgs)


class CTDDataset(Dataset):
    def __init__(self, root, pattern_path: str, data_type: str = 'syn', augment=True, resize_pattern=True, blur=False, split=0.9, test_set=False):
        super().__init__()
        self.rng = np.random.RandomState(0)
        self.augment = augment
        self.blur = blur
        imgs = glob.glob(os.path.join(root, f"{data_type}/*/im0_*.npy"), recursive=True)
        if test_set:
            self.imgs = imgs[:int(split * len(imgs))]
        else:
            self.imgs = imgs[int(split * len(imgs)):]
        self.pattern = cv2.imread(pattern_path)#, cv2.IMREAD_GRAYSCALE)

        if resize_pattern and self.pattern.shape != (480, 640, 3):
            # self.pattern = cv2.resize(self.pattern, (640, 480))
            print(self.pattern.shape)
            downsampled = cv2.pyrDown(self.pattern)
            diff = (downsampled.shape[0] - 480) // 2
            self.pattern = downsampled[diff:downsampled.shape[0]-diff, 0:downsampled.shape[1]]

        self.augmentor = Augmentor(
            image_height=480,
            image_width=640,
            max_disp=256,
            scale_min=0.6,
            scale_max=1.0,
            seed=0,
        )

    def get_disp(self, path):
        # disp = cv2.imread(path, cv2.IMREAD_UNCHANGED)
        disp = np.load(path).transpose(1,2,0)
        # return disp.astype(np.float32) / 32
        return disp

    def __getitem__(self, index):
        # find path
        left_path = self.imgs[index]
        left_disp_path = left_path.replace('im', 'disp')

        # read img, disp
        left_img = np.load(left_path)

        if left_img.dtype == 'float32':
            left_img = (left_img * 255).astype('uint8')

        left_img = cv2.merge([left_img, left_img, left_img]).reshape((480, 640, 3))

        right_img = self.pattern
        left_disp = self.get_disp(left_disp_path)

        if False: # self.rng.binomial(1, 0.5):
            left_img, right_img = np.fliplr(right_img), np.fliplr(left_img)
            left_disp, right_disp = np.fliplr(right_disp), np.fliplr(left_disp)
        left_disp[left_disp == np.inf] = 0

        if self.blur:
            kernel_size = random.sample([1,3,5,7,9], 1)[0]
            kernel = (kernel_size, kernel_size)
            left_img = cv2.GaussianBlur(left_img, kernel, 0)

        # augmentation
        if not self.augment:
            _left_img, _right_img, _left_disp, disp_mask = self.augmentor(
                left_img, right_img, left_disp
            )
        else:
            left_img, right_img, left_disp, disp_mask = self.augmentor(
                left_img, right_img, left_disp
            )

        right_img = right_img.transpose((2, 0, 1)).astype("uint8")

        return {
            "left": left_img,
            "right": right_img,
            "disparity": left_disp,
            "mask": disp_mask,
        }

    def __len__(self):
        return len(self.imgs)


class BlenderDataset(CTDDataset):
    def __init__(self, root, pattern_path: str, data_type: str = 'syn', augment=True, resize_pattern=True, blur=False, split=0.9, test_set=False, use_lightning=False):
        super().__init__(root, pattern_path)
        self.use_lightning = use_lightning
        imgs = [f for f in glob.glob(f"{root}/im_*.png", recursive=True) if not 'depth0001' in f]
        if test_set:
            self.imgs = imgs[:int(split * len(imgs))]
        else:
            self.imgs = imgs[int(split * len(imgs)):]
        self.pattern = cv2.imread(pattern_path)#, cv2.IMREAD_GRAYSCALE)

        if resize_pattern and self.pattern.shape != (480, 640, 3):
            downsampled = cv2.pyrDown(self.pattern)
            diff = (downsampled.shape[0] - 480) // 2
            self.pattern = downsampled[diff:downsampled.shape[0]-diff, 0:downsampled.shape[1]]

        self.augmentor = Augmentor(
            image_height=480,
            image_width=640,
            max_disp=256,
            scale_min=0.6,
            scale_max=1.0,
            seed=0,
        )

    def __getitem__(self, index):
        # find path
        left_path = self.imgs[index]
        left_disp_path = left_path.split('.')[0] + '_depth0001.png'

        # read img, disp
        left_img = cv2.imread(left_path)

        if left_img.dtype == 'float32':
            left_img = (left_img * 255).astype('uint8')

        if left_img.shape != (480, 640, 3):
            downsampled = cv2.pyrDown(left_img)
            diff = (downsampled.shape[0] - 480) // 2
            left_img = downsampled[diff:downsampled.shape[0]-diff, 0:downsampled.shape[1]]
            if left_img.shape[-1] != 3:
                left_img = cv2.merge([left_img, left_img, left_img]).reshape((480, 640, 3))

            right_img = self.pattern
        left_disp = self.get_disp(left_disp_path)

        if False: # self.rng.binomial(1, 0.5):
            left_img, right_img = np.fliplr(right_img), np.fliplr(left_img)
            left_disp, right_disp = np.fliplr(right_disp), np.fliplr(left_disp)
        left_disp[left_disp == np.inf] = 0

        if self.blur:
            kernel_size = random.sample([1,3,5,7,9], 1)[0]
            kernel = (kernel_size, kernel_size)
            left_img = cv2.GaussianBlur(left_img, kernel, 0)

        # augmentation
        if not self.augment:
            _left_img, _right_img, _left_disp, disp_mask = self.augmentor(
                left_img, right_img, left_disp
            )
        else:
            left_img, right_img, left_disp, disp_mask = self.augmentor(
                left_img, right_img, left_disp
            )

        if not self.use_lightning:
            # right_img = right_img.transpose((2, 0, 1)).astype("uint8")
            return {
                "left": left_img,
                "right": right_img,
                "disparity": left_disp,
                "mask": disp_mask,
            }

        right_img = right_img.transpose((2, 0, 1)).astype("uint8")
        left_img = left_img.transpose((2, 0, 1)).astype("uint8")
        return left_img, right_img, left_disp, disp_mask

    def get_disp(self, path):
        baseline = 0.075    # meters
        fl = 560.           # as per CTD
        depth = cv2.imread(path, cv2.IMREAD_UNCHANGED)
        downsampled = cv2.pyrDown(depth)
        diff = (downsampled.shape[0] - 480) // 2
        depth = downsampled[diff:downsampled.shape[0]-diff, 0:downsampled.shape[1]]
        # disp = np.load(path).transpose(1,2,0)
        # disp = baseline * fl / depth
        # return disp.astype(np.float32) / 32
        # FIXME temporarily increase disparity until new data with better depth values is generated
        #       higher values seem to speedup convergence, but introduce much stronger artifacting
        mystery_factor = 150
        # mystery_factor = 1
        disp = (baseline * fl * mystery_factor) / depth
        return disp.astype(np.float32)
enable training 3 years ago			`import os`
add wandb, make compatible with ctd data 2 years ago			`import random`
enable training 3 years ago			`import cv2`
			`import glob`
			`import numpy as np`
			`from PIL import Image, ImageEnhance`

			`from megengine.data.dataset import Dataset`


			`class Augmentor:`
			`def __init__(`
			`self,`
			`image_height=384,`
			`image_width=512,`
			`max_disp=256,`
			`scale_min=0.6,`
			`scale_max=1.0,`
			`seed=0,`
change a bunch of stuff, add wip lightning implementation 2 years ago			`):`
enable training 3 years ago			`super().__init__()`
			`self.image_height = image_height`
			`self.image_width = image_width`
			`self.max_disp = max_disp`
			`self.scale_min = scale_min`
			`self.scale_max = scale_max`
			`self.rng = np.random.RandomState(seed)`

			`def chromatic_augmentation(self, img):`
			`random_brightness = np.random.uniform(0.8, 1.2)`
			`random_contrast = np.random.uniform(0.8, 1.2)`
			`random_gamma = np.random.uniform(0.8, 1.2)`

			`img = Image.fromarray(img)`

			`enhancer = ImageEnhance.Brightness(img)`
			`img = enhancer.enhance(random_brightness)`
			`enhancer = ImageEnhance.Contrast(img)`
			`img = enhancer.enhance(random_contrast)`

			`gamma_map = [`
			`255 * 1.0 * pow(ele / 255.0, random_gamma) for ele in range(256)`
			`] * 3`
			`img = img.point(gamma_map) # use PIL's point-function to accelerate this part`

			`img_ = np.array(img)`

			`return img_`

			`def __call__(self, left_img, right_img, left_disp):`
			`# 1. chromatic augmentation`
add wandb, make compatible with ctd data 2 years ago			`# left_img = self.chromatic_augmentation(left_img)`
			`# right_img = self.chromatic_augmentation(right_img)`
enable training 3 years ago
			`# 2. spatial augmentation`
			`# 2.1) rotate & vertical shift for right image`
			`if self.rng.binomial(1, 0.5):`
			`angle, pixel = 0.1, 2`
			`px = self.rng.uniform(-pixel, pixel)`
			`ag = self.rng.uniform(-angle, angle)`
			`image_center = (`
			`self.rng.uniform(0, right_img.shape[0]),`
			`self.rng.uniform(0, right_img.shape[1]),`
			`)`
			`rot_mat = cv2.getRotationMatrix2D(image_center, ag, 1.0)`
add wandb, make compatible with ctd data 2 years ago			`# right_img = right_img.transpose(2, 1, 0)`
enable training 3 years ago			`right_img = cv2.warpAffine(`
			`right_img, rot_mat, right_img.shape[1::-1], flags=cv2.INTER_LINEAR`
			`)`
			`trans_mat = np.float32([[1, 0, 0], [0, 1, px]])`
			`right_img = cv2.warpAffine(`
			`right_img, trans_mat, right_img.shape[1::-1], flags=cv2.INTER_LINEAR`
			`)`
add wandb, make compatible with ctd data 2 years ago			`# right_img = right_img.transpose(1, 2, 0)`
enable training 3 years ago
			`# 2.2) random resize`
			`resize_scale = self.rng.uniform(self.scale_min, self.scale_max)`

			`left_img = cv2.resize(`
			`left_img,`
			`None,`
			`fx=resize_scale,`
			`fy=resize_scale,`
			`interpolation=cv2.INTER_LINEAR,`
			`)`
add wandb, make compatible with ctd data 2 years ago			`if len(left_img.shape) == 2:`
			`left_img.shape += 1,`
			`# left_img = cv2.merge([left_img, left_img, left_img])`
dataset.py: cleanup 2 years ago
enable training 3 years ago			`right_img = cv2.resize(`
			`right_img,`
			`None,`
			`fx=resize_scale,`
			`fy=resize_scale,`
			`interpolation=cv2.INTER_LINEAR,`
			`)`
add wandb, make compatible with ctd data 2 years ago			`if len(left_img.shape) == 2:`
			`left_img.shape += 1,`
			`# left_img = cv2.merge([left_img, left_img, left_img])`
enable training 3 years ago
			`disp_mask = (left_disp < float(self.max_disp / resize_scale)) & (left_disp > 0)`
			`disp_mask = disp_mask.astype("float32")`
			`disp_mask = cv2.resize(`
			`disp_mask,`
			`None,`
			`fx=resize_scale,`
			`fy=resize_scale,`
			`interpolation=cv2.INTER_LINEAR,`
			`)`

			`left_disp = (`
			`cv2.resize(`
			`left_disp,`
			`None,`
			`fx=resize_scale,`
			`fy=resize_scale,`
			`interpolation=cv2.INTER_LINEAR,`
			`)`
			`* resize_scale`
			`)`

			`# 2.3) random crop`
add wandb, make compatible with ctd data 2 years ago			`if len(left_img.shape) == 3:`
			`h, w, c = left_img.shape`
			`else:`
			`h, w = left_img.shape`
			`c = 1`
enable training 3 years ago			`dx = w - self.image_width`
			`dy = h - self.image_height`
			`dy = self.rng.randint(min(0, dy), max(0, dy) + 1)`
			`dx = self.rng.randint(min(0, dx), max(0, dx) + 1)`

			`M = np.float32([[1.0, 0.0, -dx], [0.0, 1.0, -dy]])`
			`left_img = cv2.warpAffine(`
			`left_img,`
			`M,`
			`(self.image_width, self.image_height),`
			`flags=cv2.INTER_LINEAR,`
			`borderValue=0,`
			`)`
			`right_img = cv2.warpAffine(`
			`right_img,`
			`M,`
			`(self.image_width, self.image_height),`
			`flags=cv2.INTER_LINEAR,`
			`borderValue=0,`
			`)`
			`left_disp = cv2.warpAffine(`
			`left_disp,`
			`M,`
			`(self.image_width, self.image_height),`
			`flags=cv2.INTER_LINEAR,`
			`borderValue=0,`
			`)`
			`disp_mask = cv2.warpAffine(`
			`disp_mask,`
			`M,`
			`(self.image_width, self.image_height),`
			`flags=cv2.INTER_LINEAR,`
			`borderValue=0,`
add wandb, make compatible with ctd data 2 years ago			`)`
enable training 3 years ago
			`# 3. add random occlusion to right image`
			`if self.rng.binomial(1, 0.5):`
			`sx = int(self.rng.uniform(50, 100))`
			`sy = int(self.rng.uniform(50, 100))`
			`cx = int(self.rng.uniform(sx, right_img.shape[0] - sx))`
			`cy = int(self.rng.uniform(sy, right_img.shape[1] - sy))`
			`right_img[cx - sx : cx + sx, cy - sy : cy + sy] = np.mean(`
			`np.mean(right_img, 0), 0`
			`)[np.newaxis, np.newaxis]`

add wandb, make compatible with ctd data 2 years ago			`if len(left_img.shape) == 2:`
			`left_img = cv2.merge([left_img, left_img, left_img])`
dataset.py: cleanup 2 years ago
enable training 3 years ago			`return left_img, right_img, left_disp, disp_mask`


			`class CREStereoDataset(Dataset):`
			`def __init__(self, root):`
			`super().__init__()`
			`self.imgs = glob.glob(os.path.join(root, "*/_left.jpg"), recursive=True)`
			`self.augmentor = Augmentor(`
			`image_height=384,`
			`image_width=512,`
			`max_disp=256,`
			`scale_min=0.6,`
			`scale_max=1.0,`
			`seed=0,`
			`)`
			`self.rng = np.random.RandomState(0)`

			`def get_disp(self, path):`
			`disp = cv2.imread(path, cv2.IMREAD_UNCHANGED)`
			`return disp.astype(np.float32) / 32`

			`def __getitem__(self, index):`
			`# find path`
			`left_path = self.imgs[index]`
			`prefix = left_path[: left_path.rfind("_")]`
			`right_path = prefix + "_right.jpg"`
			`left_disp_path = prefix + "_left.disp.png"`
			`right_disp_path = prefix + "_right.disp.png"`

			`# read img, disp`
			`left_img = cv2.imread(left_path, cv2.IMREAD_COLOR)`
			`right_img = cv2.imread(right_path, cv2.IMREAD_COLOR)`
			`left_disp = self.get_disp(left_disp_path)`
			`right_disp = self.get_disp(right_disp_path)`

			`if self.rng.binomial(1, 0.5):`
			`left_img, right_img = np.fliplr(right_img), np.fliplr(left_img)`
			`left_disp, right_disp = np.fliplr(right_disp), np.fliplr(left_disp)`
			`left_disp[left_disp == np.inf] = 0`

			`# augmentaion`
add wandb, make compatible with ctd data 2 years ago			`# left_img, right_img, left_disp, disp_mask = self.augmentor(`
			`_, _, left_disp, disp_mask = self.augmentor(`
enable training 3 years ago			`left_img, right_img, left_disp`
			`)`

			`left_img = left_img.transpose(2, 0, 1).astype("uint8")`
			`right_img = right_img.transpose(2, 0, 1).astype("uint8")`

			`return {`
			`"left": left_img,`
			`"right": right_img,`
			`"disparity": left_disp,`
			`"mask": disp_mask,`
			`}`

			`def __len__(self):`
			`return len(self.imgs)`
add wandb, make compatible with ctd data 2 years ago

			`class CTDDataset(Dataset):`
change a bunch of stuff, add wip lightning implementation 2 years ago			`def __init__(self, root, pattern_path: str, data_type: str = 'syn', augment=True, resize_pattern=True, blur=False, split=0.9, test_set=False):`
add wandb, make compatible with ctd data 2 years ago			`super().__init__()`
			`self.rng = np.random.RandomState(0)`
			`self.augment = augment`
			`self.blur = blur`
change a bunch of stuff, add wip lightning implementation 2 years ago			`imgs = glob.glob(os.path.join(root, f"{data_type}//im0_.npy"), recursive=True)`
			`if test_set:`
			`self.imgs = imgs[:int(split * len(imgs))]`
			`else:`
			`self.imgs = imgs[int(split * len(imgs)):]`
add wandb, make compatible with ctd data 2 years ago			`self.pattern = cv2.imread(pattern_path)#, cv2.IMREAD_GRAYSCALE)`

			`if resize_pattern and self.pattern.shape != (480, 640, 3):`
			`# self.pattern = cv2.resize(self.pattern, (640, 480))`
			`print(self.pattern.shape)`
			`downsampled = cv2.pyrDown(self.pattern)`
			`diff = (downsampled.shape[0] - 480) // 2`
			`self.pattern = downsampled[diff:downsampled.shape[0]-diff, 0:downsampled.shape[1]]`

			`self.augmentor = Augmentor(`
			`image_height=480,`
			`image_width=640,`
			`max_disp=256,`
			`scale_min=0.6,`
			`scale_max=1.0,`
			`seed=0,`
			`)`

			`def get_disp(self, path):`
			`# disp = cv2.imread(path, cv2.IMREAD_UNCHANGED)`
			`disp = np.load(path).transpose(1,2,0)`
			`# return disp.astype(np.float32) / 32`
			`return disp`

			`def __getitem__(self, index):`
			`# find path`
			`left_path = self.imgs[index]`
			`left_disp_path = left_path.replace('im', 'disp')`

			`# read img, disp`
			`left_img = np.load(left_path)`
change a bunch of stuff, add wip lightning implementation 2 years ago
			`if left_img.dtype == 'float32':`
			`left_img = (left_img * 255).astype('uint8')`

add wandb, make compatible with ctd data 2 years ago			`left_img = cv2.merge([left_img, left_img, left_img]).reshape((480, 640, 3))`

			`right_img = self.pattern`
			`left_disp = self.get_disp(left_disp_path)`

			`if False: # self.rng.binomial(1, 0.5):`
			`left_img, right_img = np.fliplr(right_img), np.fliplr(left_img)`
			`left_disp, right_disp = np.fliplr(right_disp), np.fliplr(left_disp)`
			`left_disp[left_disp == np.inf] = 0`

			`if self.blur:`
			`kernel_size = random.sample([1,3,5,7,9], 1)[0]`
			`kernel = (kernel_size, kernel_size)`
			`left_img = cv2.GaussianBlur(left_img, kernel, 0)`

			`# augmentation`
			`if not self.augment:`
			`_left_img, _right_img, _left_disp, disp_mask = self.augmentor(`
			`left_img, right_img, left_disp`
			`)`
			`else:`
			`left_img, right_img, left_disp, disp_mask = self.augmentor(`
			`left_img, right_img, left_disp`
			`)`

dataset.py: cleanup 2 years ago			`right_img = right_img.transpose((2, 0, 1)).astype("uint8")`
add wandb, make compatible with ctd data 2 years ago
			`return {`
			`"left": left_img,`
			`"right": right_img,`
			`"disparity": left_disp,`
			`"mask": disp_mask,`
			`}`

			`def __len__(self):`
			`return len(self.imgs)`
change a bunch of stuff, add wip lightning implementation 2 years ago

			`class BlenderDataset(CTDDataset):`
			`def __init__(self, root, pattern_path: str, data_type: str = 'syn', augment=True, resize_pattern=True, blur=False, split=0.9, test_set=False, use_lightning=False):`
			`super().__init__(root, pattern_path)`
			`self.use_lightning = use_lightning`
			`imgs = [f for f in glob.glob(f"{root}/im_*.png", recursive=True) if not 'depth0001' in f]`
			`if test_set:`
			`self.imgs = imgs[:int(split * len(imgs))]`
			`else:`
			`self.imgs = imgs[int(split * len(imgs)):]`
			`self.pattern = cv2.imread(pattern_path)#, cv2.IMREAD_GRAYSCALE)`

			`if resize_pattern and self.pattern.shape != (480, 640, 3):`
			`downsampled = cv2.pyrDown(self.pattern)`
			`diff = (downsampled.shape[0] - 480) // 2`
			`self.pattern = downsampled[diff:downsampled.shape[0]-diff, 0:downsampled.shape[1]]`

			`self.augmentor = Augmentor(`
			`image_height=480,`
			`image_width=640,`
			`max_disp=256,`
			`scale_min=0.6,`
			`scale_max=1.0,`
			`seed=0,`
			`)`

			`def __getitem__(self, index):`
			`# find path`
			`left_path = self.imgs[index]`
			`left_disp_path = left_path.split('.')[0] + '_depth0001.png'`

			`# read img, disp`
			`left_img = cv2.imread(left_path)`

			`if left_img.dtype == 'float32':`
			`left_img = (left_img * 255).astype('uint8')`

			`if left_img.shape != (480, 640, 3):`
			`downsampled = cv2.pyrDown(left_img)`
			`diff = (downsampled.shape[0] - 480) // 2`
			`left_img = downsampled[diff:downsampled.shape[0]-diff, 0:downsampled.shape[1]]`
			`if left_img.shape[-1] != 3:`
			`left_img = cv2.merge([left_img, left_img, left_img]).reshape((480, 640, 3))`

			`right_img = self.pattern`
			`left_disp = self.get_disp(left_disp_path)`

			`if False: # self.rng.binomial(1, 0.5):`
			`left_img, right_img = np.fliplr(right_img), np.fliplr(left_img)`
			`left_disp, right_disp = np.fliplr(right_disp), np.fliplr(left_disp)`
			`left_disp[left_disp == np.inf] = 0`

			`if self.blur:`
			`kernel_size = random.sample([1,3,5,7,9], 1)[0]`
			`kernel = (kernel_size, kernel_size)`
			`left_img = cv2.GaussianBlur(left_img, kernel, 0)`

			`# augmentation`
			`if not self.augment:`
			`_left_img, _right_img, _left_disp, disp_mask = self.augmentor(`
			`left_img, right_img, left_disp`
			`)`
			`else:`
			`left_img, right_img, left_disp, disp_mask = self.augmentor(`
			`left_img, right_img, left_disp`
			`)`

			`if not self.use_lightning:`
fix lightning, prepare sweeps 2 years ago			`# right_img = right_img.transpose((2, 0, 1)).astype("uint8")`
change a bunch of stuff, add wip lightning implementation 2 years ago			`return {`
			`"left": left_img,`
			`"right": right_img,`
			`"disparity": left_disp,`
			`"mask": disp_mask,`
			`}`

			`right_img = right_img.transpose((2, 0, 1)).astype("uint8")`
			`left_img = left_img.transpose((2, 0, 1)).astype("uint8")`
			`return left_img, right_img, left_disp, disp_mask`

			`def get_disp(self, path):`
			`baseline = 0.075 # meters`
			`fl = 560. # as per CTD`
			`depth = cv2.imread(path, cv2.IMREAD_UNCHANGED)`
			`downsampled = cv2.pyrDown(depth)`
			`diff = (downsampled.shape[0] - 480) // 2`
			`depth = downsampled[diff:downsampled.shape[0]-diff, 0:downsampled.shape[1]]`
			`# disp = np.load(path).transpose(1,2,0)`
			`# disp = baseline * fl / depth`
			`# return disp.astype(np.float32) / 32`
			`# FIXME temporarily increase disparity until new data with better depth values is generated`
			`# higher values seem to speedup convergence, but introduce much stronger artifacting`
fix lightning, prepare sweeps 2 years ago			`mystery_factor = 150`
			`# mystery_factor = 1`
change a bunch of stuff, add wip lightning implementation 2 years ago			`disp = (baseline * fl * mystery_factor) / depth`
			`return disp.astype(np.float32)`