Spaces:

ajayhk
/

JPEGArtifactRemover

Runtime error

App Files Files Community

Ajay Harikumar commited on Oct 4, 2022

Commit

feff774

1 Parent(s): 8ee7972

Add initial versions of files without resolution constrain

Browse files

Files changed (5) hide show

app.py +181 -0
network_fbcnn.py +337 -0
packages.txt +1 -0
requirements.txt +3 -0
utils_image.py +999 -0

app.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import gradio as gr
+import os.path
+import numpy as np
+from collections import OrderedDict
+import torch
+import cv2
+from PIL import Image, ImageOps
+import utils_image as util
+from network_fbcnn import FBCNN as net
+import requests
+import datetime
+for model_path in ['fbcnn_gray.pth','fbcnn_color.pth']:
+    if os.path.exists(model_path):
+        print(f'{model_path} exists.')
+    else:
+        print("downloading model")
+        url = 'https://github.com/jiaxi-jiang/FBCNN/releases/download/v1.0/{}'.format(os.path.basename(model_path))
+        r = requests.get(url, allow_redirects=True)
+        open(model_path, 'wb').write(r.content)
+def inference(input_img, is_gray, input_quality, zoom, x_shift, y_shift):
+    print("datetime:",datetime.datetime.utcnow())
+    input_img_width, input_img_height = Image.fromarray(input_img).size
+    print("img size:",(input_img_width,input_img_height))
+    if (input_img_width > 1080) or (input_img_height > 1080):
+        resize_ratio = min(1080/input_img_width, 1080/input_img_height)
+        resized_input = Image.fromarray(input_img).resize((int(input_img_width*resize_ratio)+(input_img_width*resize_ratio < 1),
+                                                           int(input_img_height*resize_ratio)+(input_img_height*resize_ratio < 1)),
+                                                          resample=Image.BICUBIC)
+        input_img = np.array(resized_input)
+        print("input image resized to:", resized_input.size)
+    if is_gray:
+        n_channels = 1 # set 1 for grayscale image, set 3 for color image
+        model_name = 'fbcnn_gray.pth'
+    else:
+        n_channels = 3 # set 1 for grayscale image, set 3 for color image
+        model_name = 'fbcnn_color.pth'
+    nc = [64,128,256,512]
+    nb = 4
+    input_quality = 100 - input_quality
+    model_path = model_name
+    if os.path.exists(model_path):
+        print(f'{model_path} already exists.')
+    else:
+        print("downloading model")
+        os.makedirs(os.path.dirname(model_path), exist_ok=True)
+        url = 'https://github.com/jiaxi-jiang/FBCNN/releases/download/v1.0/{}'.format(os.path.basename(model_path))
+        r = requests.get(url, allow_redirects=True)
+        open(model_path, 'wb').write(r.content)
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print("device:",device)
+    # ----------------------------------------
+    # load model
+    # ----------------------------------------
+    print(f'loading model from {model_path}')
+    model = net(in_nc=n_channels, out_nc=n_channels, nc=nc, nb=nb, act_mode='R')
+    print("#model.load_state_dict(torch.load(model_path), strict=True)")
+    model.load_state_dict(torch.load(model_path), strict=True)
+    print("#model.eval()")
+    model.eval()
+    print("#for k, v in model.named_parameters()")
+    for k, v in model.named_parameters():
+        v.requires_grad = False
+    print("#model.to(device)")
+    model = model.to(device)
+    print("Model loaded.")
+    test_results = OrderedDict()
+    test_results['psnr'] = []
+    test_results['ssim'] = []
+    test_results['psnrb'] = []
+    # ------------------------------------
+    # (1) img_L
+    # ------------------------------------
+    print("#if n_channels")
+    if n_channels == 1:
+        open_cv_image = Image.fromarray(input_img)
+        open_cv_image = ImageOps.grayscale(open_cv_image)
+        open_cv_image = np.array(open_cv_image) # PIL to open cv image
+        img = np.expand_dims(open_cv_image, axis=2)  # HxWx1
+    elif n_channels == 3:
+        open_cv_image = np.array(input_img) # PIL to open cv image
+        if open_cv_image.ndim == 2:
+            open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_GRAY2RGB)  # GGG
+        else:
+            open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2RGB)  # RGB
+    print("#util.uint2tensor4(open_cv_image)")
+    img_L = util.uint2tensor4(open_cv_image)
+    print("#img_L.to(device)")
+    img_L = img_L.to(device)
+    # ------------------------------------
+    # (2) img_E
+    # ------------------------------------
+    print("#model(img_L)")
+    img_E,QF = model(img_L)
+    print("#util.tensor2single(img_E)")
+    img_E = util.tensor2single(img_E)
+    print("#util.single2uint(img_E)")
+    img_E = util.single2uint(img_E)
+    print("#torch.tensor([[1-input_quality/100]]).cuda() || torch.tensor([[1-input_quality/100]])")
+    qf_input = torch.tensor([[1-input_quality/100]]).cuda() if device == torch.device('cuda') else torch.tensor([[1-input_quality/100]])
+    print("#util.single2uint(img_E)")
+    img_E,QF = model(img_L, qf_input)
+    print("#util.tensor2single(img_E)")
+    img_E = util.tensor2single(img_E)
+    print("#util.single2uint(img_E)")
+    img_E = util.single2uint(img_E)
+    if img_E.ndim == 3:
+        img_E = img_E[:, :, [2, 1, 0]]
+    print("--inference finished")
+    out_img = Image.fromarray(img_E)
+    out_img_w, out_img_h = out_img.size # output image size
+    zoom = zoom/100
+    x_shift = x_shift/100
+    y_shift = y_shift/100
+    zoom_w, zoom_h = out_img_w*zoom, out_img_h*zoom
+    zoom_left, zoom_right = int((out_img_w - zoom_w)*x_shift), int(zoom_w + (out_img_w - zoom_w)*x_shift)
+    zoom_top, zoom_bottom = int((out_img_h - zoom_h)*y_shift), int(zoom_h + (out_img_h - zoom_h)*y_shift)
+    in_img = Image.fromarray(input_img)
+    in_img = in_img.crop((zoom_left, zoom_top, zoom_right, zoom_bottom))
+    in_img = in_img.resize((int(zoom_w/zoom), int(zoom_h/zoom)), Image.NEAREST)
+    out_img = out_img.crop((zoom_left, zoom_top, zoom_right, zoom_bottom))
+    out_img = out_img.resize((int(zoom_w/zoom), int(zoom_h/zoom)), Image.NEAREST)
+    print("--generating preview finished")
+    return img_E, in_img, out_img
+gr.Interface(
+    fn = inference,
+    inputs = [gr.inputs.Image(label="Input Image"),
+              gr.inputs.Checkbox(label="Grayscale (Check this if your image is grayscale)"),
+              gr.inputs.Slider(minimum=1, maximum=100, step=1, label="Intensity (Higher = stronger JPEG artifact removal)"),
+              gr.inputs.Slider(minimum=10, maximum=100, step=1, default=50, label="Zoom Image "
+                                                                                  "(Use this to see a copy of the output image up close. "
+                                                                                   "100 = original size)"),
+              gr.inputs.Slider(minimum=0, maximum=100, step=1, label="Zoom horizontal shift "
+                                                                     "(Increase to shift to the right)"),
+              gr.inputs.Slider(minimum=0, maximum=100, step=1, label="Zoom vertical shift "
+                                                                     "(Increase to shift downwards)")
+              ],
+    outputs = [gr.outputs.Image(label="Result"),
+               gr.outputs.Image(label="Before:"),
+               gr.outputs.Image(label="After:")],
+    title = "JPEG Artifacts Removal [FBCNN]",
+    description = "Gradio Demo for JPEG Artifacts Removal. To use it, simply upload your image, "
+                  "Check out the paper and the original GitHub repo at the links below. "
+                  "JPEG artifacts are noticeable distortions of images caused by JPEG lossy compression. "
+                  "This is not a super-resolution AI but a JPEG compression artifact remover. "
+                  "Written below are the limitations of the input image. ",
+    article = "<p style='text-align: left;'>Uploaded images with transparency will be incorrectly reconstructed at the output.</p>"
+              "<p style='text-align: center;'><a href='https://github.com/jiaxi-jiang/FBCNN'>FBCNN GitHub Repo</a><br>"
+              "<a href='https://arxiv.org/abs/2109.14573'>Towards Flexible Blind JPEG Artifacts Removal (FBCNN, ICCV 2021)</a><br>"
+              "<a href='https://jiaxi-jiang.github.io/'>Jiaxi Jiang, </a>"
+              "<a href='https://cszn.github.io/'>Kai Zhang, </a>"
+              "<a href='http://people.ee.ethz.ch/~timofter/'>Radu Timofte</a></p>",
+    allow_flagging="never"
+).launch(enable_queue=True)

network_fbcnn.py ADDED Viewed

	@@ -0,0 +1,337 @@

+from collections import OrderedDict
+import torch
+import torch.nn as nn
+import numpy as np
+import torch.nn.functional as F
+import torchvision.models as models
+'''
+# --------------------------------------------
+# Advanced nn.Sequential
+# https://github.com/xinntao/BasicSR
+# --------------------------------------------
+'''
+def sequential(*args):
+    """Advanced nn.Sequential.
+    Args:
+        nn.Sequential, nn.Module
+    Returns:
+        nn.Sequential
+    """
+    if len(args) == 1:
+        if isinstance(args[0], OrderedDict):
+            raise NotImplementedError('sequential does not support OrderedDict input.')
+        return args[0]  # No sequential is needed.
+    modules = []
+    for module in args:
+        if isinstance(module, nn.Sequential):
+            for submodule in module.children():
+                modules.append(submodule)
+        elif isinstance(module, nn.Module):
+            modules.append(module)
+    return nn.Sequential(*modules)
+# --------------------------------------------
+# return nn.Sequantial of (Conv + BN + ReLU)
+# --------------------------------------------
+def conv(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True, mode='CBR', negative_slope=0.2):
+    L = []
+    for t in mode:
+        if t == 'C':
+            L.append(nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias))
+        elif t == 'T':
+            L.append(nn.ConvTranspose2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias))
+        elif t == 'B':
+            L.append(nn.BatchNorm2d(out_channels, momentum=0.9, eps=1e-04, affine=True))
+        elif t == 'I':
+            L.append(nn.InstanceNorm2d(out_channels, affine=True))
+        elif t == 'R':
+            L.append(nn.ReLU(inplace=True))
+        elif t == 'r':
+            L.append(nn.ReLU(inplace=False))
+        elif t == 'L':
+            L.append(nn.LeakyReLU(negative_slope=negative_slope, inplace=True))
+        elif t == 'l':
+            L.append(nn.LeakyReLU(negative_slope=negative_slope, inplace=False))
+        elif t == '2':
+            L.append(nn.PixelShuffle(upscale_factor=2))
+        elif t == '3':
+            L.append(nn.PixelShuffle(upscale_factor=3))
+        elif t == '4':
+            L.append(nn.PixelShuffle(upscale_factor=4))
+        elif t == 'U':
+            L.append(nn.Upsample(scale_factor=2, mode='nearest'))
+        elif t == 'u':
+            L.append(nn.Upsample(scale_factor=3, mode='nearest'))
+        elif t == 'v':
+            L.append(nn.Upsample(scale_factor=4, mode='nearest'))
+        elif t == 'M':
+            L.append(nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=0))
+        elif t == 'A':
+            L.append(nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=0))
+        else:
+            raise NotImplementedError('Undefined type: '.format(t))
+    return sequential(*L)
+# --------------------------------------------
+# Res Block: x + conv(relu(conv(x)))
+# --------------------------------------------
+class ResBlock(nn.Module):
+    def __init__(self, in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True, mode='CRC', negative_slope=0.2):
+        super(ResBlock, self).__init__()
+        assert in_channels == out_channels, 'Only support in_channels==out_channels.'
+        if mode[0] in ['R', 'L']:
+            mode = mode[0].lower() + mode[1:]
+        self.res = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode, negative_slope)
+    def forward(self, x):
+        res = self.res(x)
+        return x + res
+# --------------------------------------------
+# conv + subp (+ relu)
+# --------------------------------------------
+def upsample_pixelshuffle(in_channels=64, out_channels=3, kernel_size=3, stride=1, padding=1, bias=True, mode='2R', negative_slope=0.2):
+    assert len(mode)<4 and mode[0] in ['2', '3', '4'], 'mode examples: 2, 2R, 2BR, 3, ..., 4BR.'
+    up1 = conv(in_channels, out_channels * (int(mode[0]) ** 2), kernel_size, stride, padding, bias, mode='C'+mode, negative_slope=negative_slope)
+    return up1
+# --------------------------------------------
+# nearest_upsample + conv (+ R)
+# --------------------------------------------
+def upsample_upconv(in_channels=64, out_channels=3, kernel_size=3, stride=1, padding=1, bias=True, mode='2R', negative_slope=0.2):
+    assert len(mode)<4 and mode[0] in ['2', '3', '4'], 'mode examples: 2, 2R, 2BR, 3, ..., 4BR'
+    if mode[0] == '2':
+        uc = 'UC'
+    elif mode[0] == '3':
+        uc = 'uC'
+    elif mode[0] == '4':
+        uc = 'vC'
+    mode = mode.replace(mode[0], uc)
+    up1 = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode=mode, negative_slope=negative_slope)
+    return up1
+# --------------------------------------------
+# convTranspose (+ relu)
+# --------------------------------------------
+def upsample_convtranspose(in_channels=64, out_channels=3, kernel_size=2, stride=2, padding=0, bias=True, mode='2R', negative_slope=0.2):
+    assert len(mode)<4 and mode[0] in ['2', '3', '4'], 'mode examples: 2, 2R, 2BR, 3, ..., 4BR.'
+    kernel_size = int(mode[0])
+    stride = int(mode[0])
+    mode = mode.replace(mode[0], 'T')
+    up1 = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode, negative_slope)
+    return up1
+'''
+# --------------------------------------------
+# Downsampler
+# Kai Zhang, https://github.com/cszn/KAIR
+# --------------------------------------------
+# downsample_strideconv
+# downsample_maxpool
+# downsample_avgpool
+# --------------------------------------------
+'''
+# --------------------------------------------
+# strideconv (+ relu)
+# --------------------------------------------
+def downsample_strideconv(in_channels=64, out_channels=64, kernel_size=2, stride=2, padding=0, bias=True, mode='2R', negative_slope=0.2):
+    assert len(mode)<4 and mode[0] in ['2', '3', '4'], 'mode examples: 2, 2R, 2BR, 3, ..., 4BR.'
+    kernel_size = int(mode[0])
+    stride = int(mode[0])
+    mode = mode.replace(mode[0], 'C')
+    down1 = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode, negative_slope)
+    return down1
+# --------------------------------------------
+# maxpooling + conv (+ relu)
+# --------------------------------------------
+def downsample_maxpool(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=0, bias=True, mode='2R', negative_slope=0.2):
+    assert len(mode)<4 and mode[0] in ['2', '3'], 'mode examples: 2, 2R, 2BR, 3, ..., 3BR.'
+    kernel_size_pool = int(mode[0])
+    stride_pool = int(mode[0])
+    mode = mode.replace(mode[0], 'MC')
+    pool = conv(kernel_size=kernel_size_pool, stride=stride_pool, mode=mode[0], negative_slope=negative_slope)
+    pool_tail = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode=mode[1:], negative_slope=negative_slope)
+    return sequential(pool, pool_tail)
+# --------------------------------------------
+# averagepooling + conv (+ relu)
+# --------------------------------------------
+def downsample_avgpool(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True, mode='2R', negative_slope=0.2):
+    assert len(mode)<4 and mode[0] in ['2', '3'], 'mode examples: 2, 2R, 2BR, 3, ..., 3BR.'
+    kernel_size_pool = int(mode[0])
+    stride_pool = int(mode[0])
+    mode = mode.replace(mode[0], 'AC')
+    pool = conv(kernel_size=kernel_size_pool, stride=stride_pool, mode=mode[0], negative_slope=negative_slope)
+    pool_tail = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode=mode[1:], negative_slope=negative_slope)
+    return sequential(pool, pool_tail)
+class QFAttention(nn.Module):
+    def __init__(self, in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True, mode='CRC', negative_slope=0.2):
+        super(QFAttention, self).__init__()
+        assert in_channels == out_channels, 'Only support in_channels==out_channels.'
+        if mode[0] in ['R', 'L']:
+            mode = mode[0].lower() + mode[1:]
+        self.res = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode, negative_slope)
+    def forward(self, x, gamma, beta):
+        gamma = gamma.unsqueeze(-1).unsqueeze(-1)
+        beta = beta.unsqueeze(-1).unsqueeze(-1)
+        res = (gamma)*self.res(x) + beta
+        return x + res
+class FBCNN(nn.Module):
+    def __init__(self, in_nc=3, out_nc=3, nc=[64, 128, 256, 512], nb=4, act_mode='R', downsample_mode='strideconv',
+                 upsample_mode='convtranspose'):
+        super(FBCNN, self).__init__()
+        self.m_head = conv(in_nc, nc[0], bias=True, mode='C')
+        self.nb = nb
+        self.nc = nc
+        # downsample
+        if downsample_mode == 'avgpool':
+            downsample_block = downsample_avgpool
+        elif downsample_mode == 'maxpool':
+            downsample_block = downsample_maxpool
+        elif downsample_mode == 'strideconv':
+            downsample_block = downsample_strideconv
+        else:
+            raise NotImplementedError('downsample mode [{:s}] is not found'.format(downsample_mode))
+        self.m_down1 = sequential(
+            *[ResBlock(nc[0], nc[0], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)],
+            downsample_block(nc[0], nc[1], bias=True, mode='2'))
+        self.m_down2 = sequential(
+            *[ResBlock(nc[1], nc[1], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)],
+            downsample_block(nc[1], nc[2], bias=True, mode='2'))
+        self.m_down3 = sequential(
+            *[ResBlock(nc[2], nc[2], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)],
+            downsample_block(nc[2], nc[3], bias=True, mode='2'))
+        self.m_body_encoder = sequential(
+            *[ResBlock(nc[3], nc[3], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)])
+        self.m_body_decoder = sequential(
+            *[ResBlock(nc[3], nc[3], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)])
+        # upsample
+        if upsample_mode == 'upconv':
+            upsample_block = upsample_upconv
+        elif upsample_mode == 'pixelshuffle':
+            upsample_block = upsample_pixelshuffle
+        elif upsample_mode == 'convtranspose':
+            upsample_block = upsample_convtranspose
+        else:
+            raise NotImplementedError('upsample mode [{:s}] is not found'.format(upsample_mode))
+        self.m_up3 = nn.ModuleList([upsample_block(nc[3], nc[2], bias=True, mode='2'),
+                                  *[QFAttention(nc[2], nc[2], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)]])
+        self.m_up2 = nn.ModuleList([upsample_block(nc[2], nc[1], bias=True, mode='2'),
+                                  *[QFAttention(nc[1], nc[1], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)]])
+        self.m_up1 = nn.ModuleList([upsample_block(nc[1], nc[0], bias=True, mode='2'),
+                                  *[QFAttention(nc[0], nc[0], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)]])
+        self.m_tail = conv(nc[0], out_nc, bias=True, mode='C')
+        self.qf_pred = sequential(*[ResBlock(nc[3], nc[3], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)],
+                                  torch.nn.AdaptiveAvgPool2d((1,1)),
+                                  torch.nn.Flatten(),
+                                  torch.nn.Linear(512, 512),
+                                  nn.ReLU(),
+                                  torch.nn.Linear(512, 512),
+                                  nn.ReLU(),
+                                  torch.nn.Linear(512, 1),
+                                  nn.Sigmoid()
+                                )
+        self.qf_embed = sequential(torch.nn.Linear(1, 512),
+                                  nn.ReLU(),
+                                  torch.nn.Linear(512, 512),
+                                  nn.ReLU(),
+                                  torch.nn.Linear(512, 512),
+                                  nn.ReLU()
+                                )
+        self.to_gamma_3 = sequential(torch.nn.Linear(512, nc[2]),nn.Sigmoid())
+        self.to_beta_3 =  sequential(torch.nn.Linear(512, nc[2]),nn.Tanh())
+        self.to_gamma_2 = sequential(torch.nn.Linear(512, nc[1]),nn.Sigmoid())
+        self.to_beta_2 =  sequential(torch.nn.Linear(512, nc[1]),nn.Tanh())
+        self.to_gamma_1 = sequential(torch.nn.Linear(512, nc[0]),nn.Sigmoid())
+        self.to_beta_1 =  sequential(torch.nn.Linear(512, nc[0]),nn.Tanh())
+    def forward(self, x, qf_input=None):
+        h, w = x.size()[-2:]
+        paddingBottom = int(np.ceil(h / 8) * 8 - h)
+        paddingRight = int(np.ceil(w / 8) * 8 - w)
+        x = nn.ReplicationPad2d((0, paddingRight, 0, paddingBottom))(x)
+        x1 = self.m_head(x)
+        x2 = self.m_down1(x1)
+        x3 = self.m_down2(x2)
+        x4 = self.m_down3(x3)
+        x = self.m_body_encoder(x4)
+        qf = self.qf_pred(x)
+        x = self.m_body_decoder(x)
+        qf_embedding = self.qf_embed(qf_input) if qf_input is not None else self.qf_embed(qf)
+        gamma_3 = self.to_gamma_3(qf_embedding)
+        beta_3 = self.to_beta_3(qf_embedding)
+        gamma_2 = self.to_gamma_2(qf_embedding)
+        beta_2 = self.to_beta_2(qf_embedding)
+        gamma_1 = self.to_gamma_1(qf_embedding)
+        beta_1 = self.to_beta_1(qf_embedding)
+        x = x + x4
+        x = self.m_up3[0](x)
+        for i in range(self.nb):
+            x = self.m_up3[i+1](x, gamma_3,beta_3)
+        x = x + x3
+        x = self.m_up2[0](x)
+        for i in range(self.nb):
+            x = self.m_up2[i+1](x, gamma_2, beta_2)
+        x = x + x2
+        x = self.m_up1[0](x)
+        for i in range(self.nb):
+            x = self.m_up1[i+1](x, gamma_1, beta_1)
+        x = x + x1
+        x = self.m_tail(x)
+        x = x[..., :h, :w]
+        return x, qf
+if __name__ == "__main__":
+    x = torch.randn(1, 3, 96, 96)#.cuda()#.to(torch.device('cuda'))
+    fbar=FBAR()
+    y,qf = fbar(x)
+    print(y.shape,qf.shape)

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python3-opencv

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+opencv-python
+torchvision

utils_image.py ADDED Viewed

	@@ -0,0 +1,999 @@

+import os
+import math
+import random
+import numpy as np
+import torch
+import cv2
+from torchvision.utils import make_grid
+from datetime import datetime
+# import torchvision.transforms as transforms
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+'''
+# --------------------------------------------
+# Kai Zhang (github: https://github.com/cszn)
+# 03/Mar/2019
+# --------------------------------------------
+# https://github.com/twhui/SRGAN-pyTorch
+# https://github.com/xinntao/BasicSR
+# --------------------------------------------
+'''
+IMG_EXTENSIONS = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', '.tif']
+def is_image_file(filename):
+    return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
+def get_timestamp():
+    return datetime.now().strftime('%y%m%d-%H%M%S')
+def imshow(x, title=None, cbar=False, figsize=None):
+    plt.figure(figsize=figsize)
+    plt.imshow(np.squeeze(x), interpolation='nearest', cmap='gray')
+    if title:
+        plt.title(title)
+    if cbar:
+        plt.colorbar()
+    plt.show()
+def surf(Z, cmap='rainbow', figsize=None):
+    plt.figure(figsize=figsize)
+    ax3 = plt.axes(projection='3d')
+    w, h = Z.shape[:2]
+    xx = np.arange(0,w,1)
+    yy = np.arange(0,h,1)
+    X, Y = np.meshgrid(xx, yy)
+    ax3.plot_surface(X,Y,Z,cmap=cmap)
+    #ax3.contour(X,Y,Z, zdim='z',offset=-2，cmap=cmap)
+    plt.show()
+'''
+# --------------------------------------------
+# get image pathes
+# --------------------------------------------
+'''
+def get_image_paths(dataroot):
+    paths = None  # return None if dataroot is None
+    if dataroot is not None:
+        paths = sorted(_get_paths_from_images(dataroot))
+    return paths
+def _get_paths_from_images(path):
+    assert os.path.isdir(path), '{:s} is not a valid directory'.format(path)
+    images = []
+    for dirpath, _, fnames in sorted(os.walk(path)):
+        for fname in sorted(fnames):
+            if is_image_file(fname):
+                img_path = os.path.join(dirpath, fname)
+                images.append(img_path)
+    assert images, '{:s} has no valid image file'.format(path)
+    return images
+'''
+# --------------------------------------------
+# split large images into small images
+# --------------------------------------------
+'''
+def patches_from_image(img, p_size=512, p_overlap=64, p_max=800):
+    w, h = img.shape[:2]
+    patches = []
+    if w > p_max and h > p_max:
+        w1 = list(np.arange(0, w-p_size, p_size-p_overlap, dtype=np.int))
+        h1 = list(np.arange(0, h-p_size, p_size-p_overlap, dtype=np.int))
+        w1.append(w-p_size)
+        h1.append(h-p_size)
+        # print(w1)
+        # print(h1)
+        for i in w1:
+            for j in h1:
+                patches.append(img[i:i+p_size, j:j+p_size,:])
+    else:
+        patches.append(img)
+    return patches
+def imssave(imgs, img_path):
+    """
+    imgs: list, N images of size WxHxC
+    """
+    img_name, ext = os.path.splitext(os.path.basename(img_path))
+    for i, img in enumerate(imgs):
+        if img.ndim == 3:
+            img = img[:, :, [2, 1, 0]]
+        new_path = os.path.join(os.path.dirname(img_path), img_name+str('_{:04d}'.format(i))+'.png')
+        cv2.imwrite(new_path, img)
+def split_imageset(original_dataroot, taget_dataroot, n_channels=3, p_size=512, p_overlap=96, p_max=800):
+    """
+    split the large images from original_dataroot into small overlapped images with size (p_size)x(p_size),
+    and save them into taget_dataroot; only the images with larger size than (p_max)x(p_max)
+    will be splitted.
+    Args:
+        original_dataroot:
+        taget_dataroot:
+        p_size: size of small images
+        p_overlap: patch size in training is a good choice
+        p_max: images with smaller size than (p_max)x(p_max) keep unchanged.
+    """
+    paths = get_image_paths(original_dataroot)
+    for img_path in paths:
+        # img_name, ext = os.path.splitext(os.path.basename(img_path))
+        img = imread_uint(img_path, n_channels=n_channels)
+        patches = patches_from_image(img, p_size, p_overlap, p_max)
+        imssave(patches, os.path.join(taget_dataroot, os.path.basename(img_path)))
+        #if original_dataroot == taget_dataroot:
+        #del img_path
+'''
+# --------------------------------------------
+# makedir
+# --------------------------------------------
+'''
+def mkdir(path):
+    if not os.path.exists(path):
+        os.makedirs(path)
+def mkdirs(paths):
+    if isinstance(paths, str):
+        mkdir(paths)
+    else:
+        for path in paths:
+            mkdir(path)
+def mkdir_and_rename(path):
+    if os.path.exists(path):
+        new_name = path + '_archived_' + get_timestamp()
+        print('Path already exists. Rename it to [{:s}]'.format(new_name))
+        os.rename(path, new_name)
+    os.makedirs(path)
+'''
+# --------------------------------------------
+# read image from path
+# opencv is fast, but read BGR numpy image
+# --------------------------------------------
+'''
+# --------------------------------------------
+# get uint8 image of size HxWxn_channles (RGB)
+# --------------------------------------------
+def imread_uint(path, n_channels=3):
+    #  input: path
+    # output: HxWx3(RGB or GGG), or HxWx1 (G)
+    if n_channels == 1:
+        img = cv2.imread(path, 0)  # cv2.IMREAD_GRAYSCALE
+        img = np.expand_dims(img, axis=2)  # HxWx1
+    elif n_channels == 3:
+        img = cv2.imread(path, cv2.IMREAD_UNCHANGED)  # BGR or G
+        if img.ndim == 2:
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)  # GGG
+        else:
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # RGB
+    return img
+# --------------------------------------------
+# matlab's imwrite
+# --------------------------------------------
+def imsave(img, img_path):
+    img = np.squeeze(img)
+    if img.ndim == 3:
+        img = img[:, :, [2, 1, 0]]
+    cv2.imwrite(img_path, img)
+def imwrite(img, img_path):
+    img = np.squeeze(img)
+    if img.ndim == 3:
+        img = img[:, :, [2, 1, 0]]
+    cv2.imwrite(img_path, img)
+# --------------------------------------------
+# get single image of size HxWxn_channles (BGR)
+# --------------------------------------------
+def read_img(path):
+    # read image by cv2
+    # return: Numpy float32, HWC, BGR, [0,1]
+    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)  # cv2.IMREAD_GRAYSCALE
+    img = img.astype(np.float32) / 255.
+    if img.ndim == 2:
+        img = np.expand_dims(img, axis=2)
+    # some images have 4 channels
+    if img.shape[2] > 3:
+        img = img[:, :, :3]
+    return img
+'''
+# --------------------------------------------
+# image format conversion
+# --------------------------------------------
+# numpy(single) <--->  numpy(unit)
+# numpy(single) <--->  tensor
+# numpy(unit)   <--->  tensor
+# --------------------------------------------
+'''
+# --------------------------------------------
+# numpy(single) [0, 1] <--->  numpy(unit)
+# --------------------------------------------
+def uint2single(img):
+    return np.float32(img/255.)
+def single2uint(img):
+    return np.uint8((img.clip(0, 1)*255.).round())
+def uint162single(img):
+    return np.float32(img/65535.)
+def single2uint16(img):
+    return np.uint16((img.clip(0, 1)*65535.).round())
+# --------------------------------------------
+# numpy(unit) (HxWxC or HxW) <--->  tensor
+# --------------------------------------------
+# convert uint to 4-dimensional torch tensor
+def uint2tensor4(img):
+    if img.ndim == 2:
+        img = np.expand_dims(img, axis=2)
+    return torch.from_numpy(np.ascontiguousarray(img)).permute(2, 0, 1).float().div(255.).unsqueeze(0)
+# convert uint to 3-dimensional torch tensor
+def uint2tensor3(img):
+    if img.ndim == 2:
+        img = np.expand_dims(img, axis=2)
+    return torch.from_numpy(np.ascontiguousarray(img)).permute(2, 0, 1).float().div(255.)
+# convert 2/3/4-dimensional torch tensor to uint
+def tensor2uint(img):
+    img = img.data.squeeze().float().clamp_(0, 1).cpu().numpy()
+    if img.ndim == 3:
+        img = np.transpose(img, (1, 2, 0))
+    return np.uint8((img*255.0).round())
+# --------------------------------------------
+# numpy(single) (HxWxC) <--->  tensor
+# --------------------------------------------
+# convert single (HxWxC) to 3-dimensional torch tensor
+def single2tensor3(img):
+    return torch.from_numpy(np.ascontiguousarray(img)).permute(2, 0, 1).float()
+# convert single (HxWxC) to 4-dimensional torch tensor
+def single2tensor4(img):
+    return torch.from_numpy(np.ascontiguousarray(img)).permute(2, 0, 1).float().unsqueeze(0)
+# convert torch tensor to single
+def tensor2single(img):
+    img = img.data.squeeze().float().cpu().numpy()
+    if img.ndim == 3:
+        img = np.transpose(img, (1, 2, 0))
+    return img
+# convert torch tensor to single
+def tensor2single3(img):
+    img = img.data.squeeze().float().cpu().numpy()
+    if img.ndim == 3:
+        img = np.transpose(img, (1, 2, 0))
+    elif img.ndim == 2:
+        img = np.expand_dims(img, axis=2)
+    return img
+def single2tensor5(img):
+    return torch.from_numpy(np.ascontiguousarray(img)).permute(2, 0, 1, 3).float().unsqueeze(0)
+def single32tensor5(img):
+    return torch.from_numpy(np.ascontiguousarray(img)).float().unsqueeze(0).unsqueeze(0)
+def single42tensor4(img):
+    return torch.from_numpy(np.ascontiguousarray(img)).permute(2, 0, 1, 3).float()
+# from skimage.io import imread, imsave
+def tensor2img(tensor, out_type=np.uint8, min_max=(0, 1)):
+    '''
+    Converts a torch Tensor into an image Numpy array of BGR channel order
+    Input: 4D(B,(3/1),H,W), 3D(C,H,W), or 2D(H,W), any range, RGB channel order
+    Output: 3D(H,W,C) or 2D(H,W), [0,255], np.uint8 (default)
+    '''
+    tensor = tensor.squeeze().float().cpu().clamp_(*min_max)  # squeeze first, then clamp
+    tensor = (tensor - min_max[0]) / (min_max[1] - min_max[0])  # to range [0,1]
+    n_dim = tensor.dim()
+    if n_dim == 4:
+        n_img = len(tensor)
+        img_np = make_grid(tensor, nrow=int(math.sqrt(n_img)), normalize=False).numpy()
+        img_np = np.transpose(img_np[[2, 1, 0], :, :], (1, 2, 0))  # HWC, BGR
+    elif n_dim == 3:
+        img_np = tensor.numpy()
+        img_np = np.transpose(img_np[[2, 1, 0], :, :], (1, 2, 0))  # HWC, BGR
+    elif n_dim == 2:
+        img_np = tensor.numpy()
+    else:
+        raise TypeError(
+            'Only support 4D, 3D and 2D tensor. But received with dimension: {:d}'.format(n_dim))
+    if out_type == np.uint8:
+        img_np = (img_np * 255.0).round()
+        # Important. Unlike matlab, numpy.unit8() WILL NOT round by default.
+    return img_np.astype(out_type)
+'''
+# --------------------------------------------
+# Augmentation, flipe and/or rotate
+# --------------------------------------------
+# The following two are enough.
+# (1) augmet_img: numpy image of WxHxC or WxH
+# (2) augment_img_tensor4: tensor image 1xCxWxH
+# --------------------------------------------
+'''
+def augment_img(img, mode=0):
+    '''Kai Zhang (github: https://github.com/cszn)
+    '''
+    if mode == 0:
+        return img
+    elif mode == 1:
+        return np.flipud(np.rot90(img))
+    elif mode == 2:
+        return np.flipud(img)
+    elif mode == 3:
+        return np.rot90(img, k=3)
+    elif mode == 4:
+        return np.flipud(np.rot90(img, k=2))
+    elif mode == 5:
+        return np.rot90(img)
+    elif mode == 6:
+        return np.rot90(img, k=2)
+    elif mode == 7:
+        return np.flipud(np.rot90(img, k=3))
+def augment_img_tensor4(img, mode=0):
+    '''Kai Zhang (github: https://github.com/cszn)
+    '''
+    if mode == 0:
+        return img
+    elif mode == 1:
+        return img.rot90(1, [2, 3]).flip([2])
+    elif mode == 2:
+        return img.flip([2])
+    elif mode == 3:
+        return img.rot90(3, [2, 3])
+    elif mode == 4:
+        return img.rot90(2, [2, 3]).flip([2])
+    elif mode == 5:
+        return img.rot90(1, [2, 3])
+    elif mode == 6:
+        return img.rot90(2, [2, 3])
+    elif mode == 7:
+        return img.rot90(3, [2, 3]).flip([2])
+def augment_img_tensor(img, mode=0):
+    '''Kai Zhang (github: https://github.com/cszn)
+    '''
+    img_size = img.size()
+    img_np = img.data.cpu().numpy()
+    if len(img_size) == 3:
+        img_np = np.transpose(img_np, (1, 2, 0))
+    elif len(img_size) == 4:
+        img_np = np.transpose(img_np, (2, 3, 1, 0))
+    img_np = augment_img(img_np, mode=mode)
+    img_tensor = torch.from_numpy(np.ascontiguousarray(img_np))
+    if len(img_size) == 3:
+        img_tensor = img_tensor.permute(2, 0, 1)
+    elif len(img_size) == 4:
+        img_tensor = img_tensor.permute(3, 2, 0, 1)
+    return img_tensor.type_as(img)
+def augment_img_np3(img, mode=0):
+    if mode == 0:
+        return img
+    elif mode == 1:
+        return img.transpose(1, 0, 2)
+    elif mode == 2:
+        return img[::-1, :, :]
+    elif mode == 3:
+        img = img[::-1, :, :]
+        img = img.transpose(1, 0, 2)
+        return img
+    elif mode == 4:
+        return img[:, ::-1, :]
+    elif mode == 5:
+        img = img[:, ::-1, :]
+        img = img.transpose(1, 0, 2)
+        return img
+    elif mode == 6:
+        img = img[:, ::-1, :]
+        img = img[::-1, :, :]
+        return img
+    elif mode == 7:
+        img = img[:, ::-1, :]
+        img = img[::-1, :, :]
+        img = img.transpose(1, 0, 2)
+        return img
+def augment_imgs(img_list, hflip=True, rot=True):
+    # horizontal flip OR rotate
+    hflip = hflip and random.random() < 0.5
+    vflip = rot and random.random() < 0.5
+    rot90 = rot and random.random() < 0.5
+    def _augment(img):
+        if hflip:
+            img = img[:, ::-1, :]
+        if vflip:
+            img = img[::-1, :, :]
+        if rot90:
+            img = img.transpose(1, 0, 2)
+        return img
+    return [_augment(img) for img in img_list]
+'''
+# --------------------------------------------
+# modcrop and shave
+# --------------------------------------------
+'''
+def modcrop(img_in, scale):
+    # img_in: Numpy, HWC or HW
+    img = np.copy(img_in)
+    if img.ndim == 2:
+        H, W = img.shape
+        H_r, W_r = H % scale, W % scale
+        img = img[:H - H_r, :W - W_r]
+    elif img.ndim == 3:
+        H, W, C = img.shape
+        H_r, W_r = H % scale, W % scale
+        img = img[:H - H_r, :W - W_r, :]
+    else:
+        raise ValueError('Wrong img ndim: [{:d}].'.format(img.ndim))
+    return img
+def shave(img_in, border=0):
+    # img_in: Numpy, HWC or HW
+    img = np.copy(img_in)
+    h, w = img.shape[:2]
+    img = img[border:h-border, border:w-border]
+    return img
+'''
+# --------------------------------------------
+# image processing process on numpy image
+# channel_convert(in_c, tar_type, img_list):
+# rgb2ycbcr(img, only_y=True):
+# bgr2ycbcr(img, only_y=True):
+# ycbcr2rgb(img):
+# --------------------------------------------
+'''
+def rgb2ycbcr(img, only_y=True):
+    '''same as matlab rgb2ycbcr
+    only_y: only return Y channel
+    Input:
+        uint8, [0, 255]
+        float, [0, 1]
+    '''
+    in_img_type = img.dtype
+    img.astype(np.float32)
+    if in_img_type != np.uint8:
+        img *= 255.
+    # convert
+    if only_y:
+        rlt = np.dot(img, [65.481, 128.553, 24.966]) / 255.0 + 16.0
+    else:
+        rlt = np.matmul(img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786],
+                              [24.966, 112.0, -18.214]]) / 255.0 + [16, 128, 128]
+    if in_img_type == np.uint8:
+        rlt = rlt.round()
+    else:
+        rlt /= 255.
+    return rlt.astype(in_img_type)
+def ycbcr2rgb(img):
+    '''same as matlab ycbcr2rgb
+    Input:
+        uint8, [0, 255]
+        float, [0, 1]
+    '''
+    in_img_type = img.dtype
+    img.astype(np.float32)
+    if in_img_type != np.uint8:
+        img *= 255.
+    # convert
+    rlt = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], [0, -0.00153632, 0.00791071],
+                          [0.00625893, -0.00318811, 0]]) * 255.0 + [-222.921, 135.576, -276.836]
+    if in_img_type == np.uint8:
+        rlt = rlt.round()
+    else:
+        rlt /= 255.
+    return rlt.astype(in_img_type)
+def bgr2ycbcr(img, only_y=True):
+    '''bgr version of rgb2ycbcr
+    only_y: only return Y channel
+    Input:
+        uint8, [0, 255]
+        float, [0, 1]
+    '''
+    in_img_type = img.dtype
+    img.astype(np.float32)
+    if in_img_type != np.uint8:
+        img *= 255.
+    # convert
+    if only_y:
+        rlt = np.dot(img, [24.966, 128.553, 65.481]) / 255.0 + 16.0
+    else:
+        rlt = np.matmul(img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786],
+                              [65.481, -37.797, 112.0]]) / 255.0 + [16, 128, 128]
+    if in_img_type == np.uint8:
+        rlt = rlt.round()
+    else:
+        rlt /= 255.
+    return rlt.astype(in_img_type)
+def channel_convert(in_c, tar_type, img_list):
+    # conversion among BGR, gray and y
+    if in_c == 3 and tar_type == 'gray':  # BGR to gray
+        gray_list = [cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) for img in img_list]
+        return [np.expand_dims(img, axis=2) for img in gray_list]
+    elif in_c == 3 and tar_type == 'y':  # BGR to y
+        y_list = [bgr2ycbcr(img, only_y=True) for img in img_list]
+        return [np.expand_dims(img, axis=2) for img in y_list]
+    elif in_c == 1 and tar_type == 'RGB':  # gray/y to BGR
+        return [cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) for img in img_list]
+    else:
+        return img_list
+'''
+# --------------------------------------------
+# metric, PSNR and SSIM
+# --------------------------------------------
+'''
+# --------------------------------------------
+# PSNR
+# --------------------------------------------
+def calculate_psnr(img1, img2, border=0):
+    # img1 and img2 have range [0, 255]
+    #img1 = img1.squeeze()
+    #img2 = img2.squeeze()
+    if not img1.shape == img2.shape:
+        raise ValueError('Input images must have the same dimensions.')
+    h, w = img1.shape[:2]
+    img1 = img1[border:h-border, border:w-border]
+    img2 = img2[border:h-border, border:w-border]
+    img1 = img1.astype(np.float64)
+    img2 = img2.astype(np.float64)
+    mse = np.mean((img1 - img2)**2)
+    if mse == 0:
+        return float('inf')
+    return 20 * math.log10(255.0 / math.sqrt(mse))
+# --------------------------------------------
+# BEF: Blocking effect factor
+# --------------------------------------------
+def compute_bef(img):
+	block = 8
+	height, width = img.shape[:2]
+	H = [i for i in range(width-1)]
+	H_B = [i for i in range(block-1,width-1,block)]
+	H_BC = list(set(H)-set(H_B))
+	V = [i for i in range(height-1)]
+	V_B = [i for i in range(block-1,height-1,block)]
+	V_BC = list(set(V)-set(V_B))
+	D_B = 0
+	D_BC = 0
+	for i in H_B:
+		diff = img[:,i] - img[:,i+1]
+		D_B += np.sum(diff**2)
+	for i in H_BC:
+		diff = img[:,i] - img[:,i+1]
+		D_BC += np.sum(diff**2)
+	for j in V_B:
+		diff = img[j,:] - img[j+1,:]
+		D_B += np.sum(diff**2)
+	for j in V_BC:
+		diff = img[j,:] - img[j+1,:]
+		D_BC += np.sum(diff**2)
+	N_HB = height * (width/block - 1)
+	N_HBC = height * (width - 1) - N_HB
+	N_VB = width * (height/block -1)
+	N_VBC = width * (height -1) - N_VB
+	D_B = D_B / (N_HB + N_VB)
+	D_BC = D_BC / (N_HBC + N_VBC)
+	eta = math.log2(block) / math.log2(min(height, width)) if D_B > D_BC else 0
+	return eta * (D_B - D_BC)
+# --------------------------------------------
+# PSNRB
+# --------------------------------------------
+def calculate_psnrb(img1, img2, border=0):
+	# img1: ground truth
+	# img2: compressed image
+    # img1 and img2 have range [0, 255]
+    #img1 = img1.squeeze()
+    #img2 = img2.squeeze()
+    if not img1.shape == img2.shape:
+        raise ValueError('Input images must have the same dimensions.')
+    h, w = img1.shape[:2]
+    img1 = img1[border:h-border, border:w-border]
+    img2 = img2[border:h-border, border:w-border]
+    img1 = img1.astype(np.float64)
+    if img2.shape[-1]==3:
+        img2_y = rgb2ycbcr(img2).astype(np.float64)
+        bef = compute_bef(img2_y)
+    else:
+        img2 = img2.astype(np.float64)
+        bef = compute_bef(img2)
+    mse = np.mean((img1 - img2)**2)
+    mse_b = mse + bef
+    if mse_b == 0:
+        return float('inf')
+    return 20 * math.log10(255.0 / math.sqrt(mse_b))
+# --------------------------------------------
+# SSIM
+# --------------------------------------------
+def calculate_ssim(img1, img2, border=0):
+    '''calculate SSIM
+    the same outputs as MATLAB's
+    img1, img2: [0, 255]
+    '''
+    #img1 = img1.squeeze()
+    #img2 = img2.squeeze()
+    if not img1.shape == img2.shape:
+        raise ValueError('Input images must have the same dimensions.')
+    h, w = img1.shape[:2]
+    img1 = img1[border:h-border, border:w-border]
+    img2 = img2[border:h-border, border:w-border]
+    if img1.ndim == 2:
+        return ssim(img1, img2)
+    elif img1.ndim == 3:
+        if img1.shape[2] == 3:
+            ssims = []
+            for i in range(3):
+                ssims.append(ssim(img1[:,:,i], img2[:,:,i]))
+            return np.array(ssims).mean()
+        elif img1.shape[2] == 1:
+            return ssim(np.squeeze(img1), np.squeeze(img2))
+    else:
+        raise ValueError('Wrong input image dimensions.')
+def ssim(img1, img2):
+    C1 = (0.01 * 255)**2
+    C2 = (0.03 * 255)**2
+    img1 = img1.astype(np.float64)
+    img2 = img2.astype(np.float64)
+    kernel = cv2.getGaussianKernel(11, 1.5)
+    window = np.outer(kernel, kernel.transpose())
+    mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5]  # valid
+    mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
+    mu1_sq = mu1**2
+    mu2_sq = mu2**2
+    mu1_mu2 = mu1 * mu2
+    sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
+    sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
+    sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
+    ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) *
+                                                            (sigma1_sq + sigma2_sq + C2))
+    return ssim_map.mean()
+'''
+# --------------------------------------------
+# matlab's bicubic imresize (numpy and torch) [0, 1]
+# --------------------------------------------
+'''
+# matlab 'imresize' function, now only support 'bicubic'
+def cubic(x):
+    absx = torch.abs(x)
+    absx2 = absx**2
+    absx3 = absx**3
+    return (1.5*absx3 - 2.5*absx2 + 1) * ((absx <= 1).type_as(absx)) + \
+        (-0.5*absx3 + 2.5*absx2 - 4*absx + 2) * (((absx > 1)*(absx <= 2)).type_as(absx))
+def calculate_weights_indices(in_length, out_length, scale, kernel, kernel_width, antialiasing):
+    if (scale < 1) and (antialiasing):
+        # Use a modified kernel to simultaneously interpolate and antialias- larger kernel width
+        kernel_width = kernel_width / scale
+    # Output-space coordinates
+    x = torch.linspace(1, out_length, out_length)
+    # Input-space coordinates. Calculate the inverse mapping such that 0.5
+    # in output space maps to 0.5 in input space, and 0.5+scale in output
+    # space maps to 1.5 in input space.
+    u = x / scale + 0.5 * (1 - 1 / scale)
+    # What is the left-most pixel that can be involved in the computation?
+    left = torch.floor(u - kernel_width / 2)
+    # What is the maximum number of pixels that can be involved in the
+    # computation?  Note: it's OK to use an extra pixel here; if the
+    # corresponding weights are all zero, it will be eliminated at the end
+    # of this function.
+    P = math.ceil(kernel_width) + 2
+    # The indices of the input pixels involved in computing the k-th output
+    # pixel are in row k of the indices matrix.
+    indices = left.view(out_length, 1).expand(out_length, P) + torch.linspace(0, P - 1, P).view(
+        1, P).expand(out_length, P)
+    # The weights used to compute the k-th output pixel are in row k of the
+    # weights matrix.
+    distance_to_center = u.view(out_length, 1).expand(out_length, P) - indices
+    # apply cubic kernel
+    if (scale < 1) and (antialiasing):
+        weights = scale * cubic(distance_to_center * scale)
+    else:
+        weights = cubic(distance_to_center)
+    # Normalize the weights matrix so that each row sums to 1.
+    weights_sum = torch.sum(weights, 1).view(out_length, 1)
+    weights = weights / weights_sum.expand(out_length, P)
+    # If a column in weights is all zero, get rid of it. only consider the first and last column.
+    weights_zero_tmp = torch.sum((weights == 0), 0)
+    if not math.isclose(weights_zero_tmp[0], 0, rel_tol=1e-6):
+        indices = indices.narrow(1, 1, P - 2)
+        weights = weights.narrow(1, 1, P - 2)
+    if not math.isclose(weights_zero_tmp[-1], 0, rel_tol=1e-6):
+        indices = indices.narrow(1, 0, P - 2)
+        weights = weights.narrow(1, 0, P - 2)
+    weights = weights.contiguous()
+    indices = indices.contiguous()
+    sym_len_s = -indices.min() + 1
+    sym_len_e = indices.max() - in_length
+    indices = indices + sym_len_s - 1
+    return weights, indices, int(sym_len_s), int(sym_len_e)
+# --------------------------------------------
+# imresize for tensor image [0, 1]
+# --------------------------------------------
+def imresize(img, scale, antialiasing=True):
+    # Now the scale should be the same for H and W
+    # input: img: pytorch tensor, CHW or HW [0,1]
+    # output: CHW or HW [0,1] w/o round
+    need_squeeze = True if img.dim() == 2 else False
+    if need_squeeze:
+        img.unsqueeze_(0)
+    in_C, in_H, in_W = img.size()
+    out_C, out_H, out_W = in_C, math.ceil(in_H * scale), math.ceil(in_W * scale)
+    kernel_width = 4
+    kernel = 'cubic'
+    # Return the desired dimension order for performing the resize.  The
+    # strategy is to perform the resize first along the dimension with the
+    # smallest scale factor.
+    # Now we do not support this.
+    # get weights and indices
+    weights_H, indices_H, sym_len_Hs, sym_len_He = calculate_weights_indices(
+        in_H, out_H, scale, kernel, kernel_width, antialiasing)
+    weights_W, indices_W, sym_len_Ws, sym_len_We = calculate_weights_indices(
+        in_W, out_W, scale, kernel, kernel_width, antialiasing)
+    # process H dimension
+    # symmetric copying
+    img_aug = torch.FloatTensor(in_C, in_H + sym_len_Hs + sym_len_He, in_W)
+    img_aug.narrow(1, sym_len_Hs, in_H).copy_(img)
+    sym_patch = img[:, :sym_len_Hs, :]
+    inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long()
+    sym_patch_inv = sym_patch.index_select(1, inv_idx)
+    img_aug.narrow(1, 0, sym_len_Hs).copy_(sym_patch_inv)
+    sym_patch = img[:, -sym_len_He:, :]
+    inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long()
+    sym_patch_inv = sym_patch.index_select(1, inv_idx)
+    img_aug.narrow(1, sym_len_Hs + in_H, sym_len_He).copy_(sym_patch_inv)
+    out_1 = torch.FloatTensor(in_C, out_H, in_W)
+    kernel_width = weights_H.size(1)
+    for i in range(out_H):
+        idx = int(indices_H[i][0])
+        for j in range(out_C):
+            out_1[j, i, :] = img_aug[j, idx:idx + kernel_width, :].transpose(0, 1).mv(weights_H[i])
+    # process W dimension
+    # symmetric copying
+    out_1_aug = torch.FloatTensor(in_C, out_H, in_W + sym_len_Ws + sym_len_We)
+    out_1_aug.narrow(2, sym_len_Ws, in_W).copy_(out_1)
+    sym_patch = out_1[:, :, :sym_len_Ws]
+    inv_idx = torch.arange(sym_patch.size(2) - 1, -1, -1).long()
+    sym_patch_inv = sym_patch.index_select(2, inv_idx)
+    out_1_aug.narrow(2, 0, sym_len_Ws).copy_(sym_patch_inv)
+    sym_patch = out_1[:, :, -sym_len_We:]
+    inv_idx = torch.arange(sym_patch.size(2) - 1, -1, -1).long()
+    sym_patch_inv = sym_patch.index_select(2, inv_idx)
+    out_1_aug.narrow(2, sym_len_Ws + in_W, sym_len_We).copy_(sym_patch_inv)
+    out_2 = torch.FloatTensor(in_C, out_H, out_W)
+    kernel_width = weights_W.size(1)
+    for i in range(out_W):
+        idx = int(indices_W[i][0])
+        for j in range(out_C):
+            out_2[j, :, i] = out_1_aug[j, :, idx:idx + kernel_width].mv(weights_W[i])
+    if need_squeeze:
+        out_2.squeeze_()
+    return out_2
+# --------------------------------------------
+# imresize for numpy image [0, 1]
+# --------------------------------------------
+def imresize_np(img, scale, antialiasing=True):
+    # Now the scale should be the same for H and W
+    # input: img: Numpy, HWC or HW [0,1]
+    # output: HWC or HW [0,1] w/o round
+    img = torch.from_numpy(img)
+    need_squeeze = True if img.dim() == 2 else False
+    if need_squeeze:
+        img.unsqueeze_(2)
+    in_H, in_W, in_C = img.size()
+    out_C, out_H, out_W = in_C, math.ceil(in_H * scale), math.ceil(in_W * scale)
+    kernel_width = 4
+    kernel = 'cubic'
+    # Return the desired dimension order for performing the resize.  The
+    # strategy is to perform the resize first along the dimension with the
+    # smallest scale factor.
+    # Now we do not support this.
+    # get weights and indices
+    weights_H, indices_H, sym_len_Hs, sym_len_He = calculate_weights_indices(
+        in_H, out_H, scale, kernel, kernel_width, antialiasing)
+    weights_W, indices_W, sym_len_Ws, sym_len_We = calculate_weights_indices(
+        in_W, out_W, scale, kernel, kernel_width, antialiasing)
+    # process H dimension
+    # symmetric copying
+    img_aug = torch.FloatTensor(in_H + sym_len_Hs + sym_len_He, in_W, in_C)
+    img_aug.narrow(0, sym_len_Hs, in_H).copy_(img)
+    sym_patch = img[:sym_len_Hs, :, :]
+    inv_idx = torch.arange(sym_patch.size(0) - 1, -1, -1).long()
+    sym_patch_inv = sym_patch.index_select(0, inv_idx)
+    img_aug.narrow(0, 0, sym_len_Hs).copy_(sym_patch_inv)
+    sym_patch = img[-sym_len_He:, :, :]
+    inv_idx = torch.arange(sym_patch.size(0) - 1, -1, -1).long()
+    sym_patch_inv = sym_patch.index_select(0, inv_idx)
+    img_aug.narrow(0, sym_len_Hs + in_H, sym_len_He).copy_(sym_patch_inv)
+    out_1 = torch.FloatTensor(out_H, in_W, in_C)
+    kernel_width = weights_H.size(1)
+    for i in range(out_H):
+        idx = int(indices_H[i][0])
+        for j in range(out_C):
+            out_1[i, :, j] = img_aug[idx:idx + kernel_width, :, j].transpose(0, 1).mv(weights_H[i])
+    # process W dimension
+    # symmetric copying
+    out_1_aug = torch.FloatTensor(out_H, in_W + sym_len_Ws + sym_len_We, in_C)
+    out_1_aug.narrow(1, sym_len_Ws, in_W).copy_(out_1)
+    sym_patch = out_1[:, :sym_len_Ws, :]
+    inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long()
+    sym_patch_inv = sym_patch.index_select(1, inv_idx)
+    out_1_aug.narrow(1, 0, sym_len_Ws).copy_(sym_patch_inv)
+    sym_patch = out_1[:, -sym_len_We:, :]
+    inv_idx = torch.arange(sym_patch.size(1) - 1, -1, -1).long()
+    sym_patch_inv = sym_patch.index_select(1, inv_idx)
+    out_1_aug.narrow(1, sym_len_Ws + in_W, sym_len_We).copy_(sym_patch_inv)
+    out_2 = torch.FloatTensor(out_H, out_W, in_C)
+    kernel_width = weights_W.size(1)
+    for i in range(out_W):
+        idx = int(indices_W[i][0])
+        for j in range(out_C):
+            out_2[:, i, j] = out_1_aug[:, idx:idx + kernel_width, j].mv(weights_W[i])
+    if need_squeeze:
+        out_2.squeeze_()
+    return out_2.numpy()
+if __name__ == '__main__':
+    img = imread_uint('test.bmp', 3)
+#    img = uint2single(img)
+#    img_bicubic = imresize_np(img, 1/4)
+#    imshow(single2uint(img_bicubic))
+#
+#    img_tensor = single2tensor4(img)
+#    for i in range(8):
+#        imshow(np.concatenate((augment_img(img, i), tensor2single(augment_img_tensor4(img_tensor, i))), 1))
+#    patches = patches_from_image(img, p_size=128, p_overlap=0, p_max=200)
+#    imssave(patches,'a.png')