diovisgood/LICENSE

## LICENSE
MIT License

Copyright (c) 2018 Pavel Chernov

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

## SpatialConvolutionLua.lua
-- Copyright (c) 2018 Pavel Chernov
-- This code is distributed under MIT License.
--
-- This module was written by Pavel Chernov in February 2018 ([email protected])
-- The reason was to verify my understanding of convolution and it's gradient calculation.
-- This code was based on paper by Vincent Dumoulin and Francesco Visin:
-- 'A guide to convolution arithmetic for deep learning' (https://arxiv.org/pdf/1603.07285.pdf)

local torch = require 'torch'
local nn = require 'nn'

local SpatialConvolutionLua, parent = torch.class('nn.SpatialConvolutionLua', 'nn.Module')

function SpatialConvolutionLua:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH)
  parent.__init(self)

  dW = dW or 1
  dH = dH or 1

  self.nInputPlane = nInputPlane
  self.nOutputPlane = nOutputPlane
  self.kW = kW
  self.kH = kH

  self.dW = dW
  self.dH = dH
  self.padW = padW or 0
  self.padH = padH or self.padW

  -- bias is not supported yet. Because I don't know how to calculate gradBias :(
  self.weight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
  --self.bias = torch.Tensor(nOutputPlane)
  self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
  --self.gradBias = torch.Tensor(nOutputPlane)

  self:reset()
end

function SpatialConvolutionLua:noBias()
  self.bias = nil
  self.gradBias = nil
  return self
end

function SpatialConvolutionLua:reset(stdv)
  if stdv then
    stdv = stdv * math.sqrt(3)
  else
    stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane)
  end
  if nn.oldSeed then
    self.weight:apply(function()
      return torch.uniform(-stdv, stdv)
    end)
    if self.bias then
      self.bias:apply(function()
        return torch.uniform(-stdv, stdv)
      end)
    end
  else
    self.weight:uniform(-stdv, stdv)
    if self.bias then
      self.bias:uniform(-stdv, stdv)
    end
  end
end

function SpatialConvolutionLua:updateOutput(input)
  -- Backward compatibility
  if self.padding then
    self.padW = self.padding
    self.padH = self.padding
    self.padding = nil
  end

  -- Get initial parameters
  local nInputPlane, nOutputPlane = self.nInputPlane, self.nOutputPlane
  local kW, kH = self.kW, self.kH
  local dW, dH = self.dW, self.dH
  local pW, pH = self.padW, self.padH

  -- Read parameters of input
  local f_no_batch
  local batchSize = 0
  local numChannels = 0
  local inputHeight = 0
  local inputWidth = 0
  if input:dim() == 4 then -- batch
    batchSize = input:size(1)
    numChannels = input:size(2)
    inputHeight = input:size(3)
    inputWidth = input:size(4)
  elseif input:dim() == 3 then -- image
    f_no_batch = true
    batchSize = 1
    numChannels = input:size(1)
    inputHeight = input:size(2)
    inputWidth = input:size(3)
    input = input:view(1, numChannels, inputHeight, inputWidth)
  else
    error('SpatialConvolutionLua:updateOutput - Incorrect number of input dimensions')
  end

  -- Check input
  assert((nInputPlane == numChannels),
    'SpatialConvolutionLua:updateOutput - Incorrect input size for nInputPlane')

  -- Calculate output height and width
  local outputHeight = math.floor((inputHeight + 2*pH - kH) / dH) + 1
  local outputWidth = math.floor((inputWidth + 2*pW - kW) / dW) + 1

  -- Create output tensor
  local output = torch.Tensor()
    :typeAs(input)
    :resize(batchSize, nOutputPlane, outputHeight, outputWidth)
    :zero()

  -- Create a tensor to read 'patches' of input image of size (batchSize x nInputPlane x kH x kW)
  local x = torch.Tensor()
    :typeAs(input)
    :resize(batchSize, nInputPlane, kH, kW)

  local xh1, xh2, xw1, xw2
  local ih1, ih2, iw1, iw2
  local oh, ow
  local W, B

  for o = 1, nOutputPlane do
    oh = 0
    -- Read weight and bias
    W = self.weight[o]:view(1, nInputPlane, kH, kW)
      :expand(batchSize, nInputPlane, kH, kW)
    B = 0
    if self.bias then
      B = self.bias[o]:view(1, nInputPlane):expand(batchSize, nInputPlane)
    end
    for h = 1 - pH, inputHeight + pH - kH + 1, dH do
      oh = oh + 1
      ow = 0
      for w = 1 - pW, inputWidth + pW - kW + 1, dW do
        ow = ow + 1
        -- Fill in patch x from input with respect to padding pW and pH
        x:zero()
        xh1, xh2 = h, h + kH - 1
        xw1, xw2 = w, w + kW - 1
        ih1, ih2 = 1, inputHeight
        iw1, iw2 = 1, inputWidth
        ih1, ih2 = math.max(xh1, ih1), math.min(xh2, ih2)
        iw1, iw2 = math.max(xw1, iw1), math.min(xw2, iw2)
        if (ih2 >= ih1) and (iw2 >= iw1) then
          xh1, xh2 = (ih1 - xh1 + 1), (kH - xh2 + ih2)
          xw1, xw2 = (iw1 - xw1 + 1), (kW - xw2 + iw2)
          assert(xh2 - xh1 == ih2 - ih1,
            'SpatialConvolutionLua:updateOutput: Failed to calculate height '
            ..xh1..' '..xh2..' '..ih1..' '..ih2)
          assert(xw2 - xw1 == iw2 - iw1,
            'SpatialConvolutionLua:updateOutput: Failed to calculate width '
            ..xw1..' '..xw2..' '..iw1..' '..iw2)
          x[{{}, {}, {xh1, xh2}, {xw1, xw2}}] = input[{{}, {}, {ih1, ih2}, {iw1, iw2}}]
        end
        -- Multiply weight W by x and get resulting pixel for every output plane of each batch sample
        output[{{}, o, oh, ow}]
          :add( torch.cmul(W, x):sum(2):sum(3):sum(4) + B )
      end -- for inputWidth
    end -- for inputHeight
  end -- for nOutputPlane

  if f_no_batch then
    output:resize(nOutputPlane, outputHeight, outputWidth)
  end

  self.output = output
  return output
end -- function SpatialConvolutionLua:updateOutput

local function getAdjustedGradOutput(gradOutput, _zH, _zW, _zB, _zR, _gradHeight, _gradWidth)
  local gradHeight, gradWidth = gradOutput:size(3), gradOutput:size(4)
  local _gradOutput
  if (_zH <= 0) and (_zW <= 0) then
    if (_zB <= 0) and (_zR <= 0) then
      -- Just use source gradOutput
      _gradOutput = gradOutput
    else
      -- Add zeroes at right and/or bottom
      _gradOutput = torch.Tensor()
        :typeAs(gradOutput)
        :resize(gradOutput:size(1), gradOutput:size(2), _gradHeight, _gradWidth)
        :zero()
      _gradOutput[{{}, {}, {1,gradHeight}, {1,gradWidth}}] = gradOutput
    end
  else
    -- Insert zeroes between pixels and/or add right/bottom zeroes border
    _gradOutput = torch.Tensor()
      :typeAs(gradOutput)
      :resize(gradOutput:size(1), gradOutput:size(2), _gradHeight, _gradWidth)
      :zero()
    for h = 1, gradHeight do
      for w = 1, gradWidth do
        _gradOutput[{{}, {}, (_zH+1)*(h-1)+1, (_zW+1)*(w-1)+1}] = gradOutput[{{}, {}, h, w}]
      end
    end
  end
  return _gradOutput
end -- function getAdjustedGradOutput

local function getReversedWeight(weight)
  local nOutputPlane, nInputPlane, kH, kW =
    weight:size(1), weight:size(2), weight:size(3), weight:size(4)

  weight = weight:view(nOutputPlane, nInputPlane, kH*kW)

  local _weight = torch.Tensor()
    :typeAs(weight)
    :resizeAs(weight)

  local reverse = torch.range(kH*kW, 1, -1):long()

  for o = 1, weight:size(1) do
    for i = 1, weight:size(2) do
      _weight[o][i] = weight[o][i]:index(1, reverse)
    end
  end

  _weight:resize(nOutputPlane, nInputPlane, kH, kW)
  return _weight
end -- function getReversedWeight

function SpatialConvolutionLua:updateGradInput(input, gradOutput)
  -- Get initial parameters
  local nInputPlane, nOutputPlane = self.nInputPlane, self.nOutputPlane
  local kW, kH = self.kW, self.kH
  local dW, dH = self.dW, self.dH
  local pW, pH = self.padW, self.padH

  -- Read input parameters
  local f_no_batch
  local batchSize = 0
  local numChannels = 0
  local inputHeight = 0
  local inputWidth = 0
  if input:dim() == 4 then -- batch
    batchSize = input:size(1)
    numChannels = input:size(2)
    inputHeight = input:size(3)
    inputWidth = input:size(4)
  elseif input:dim() == 3 then -- image
    f_no_batch = true
    batchSize = 1
    numChannels = input:size(1)
    inputHeight = input:size(2)
    inputWidth = input:size(3)
    input = input:view(1, numChannels, inputHeight, inputWidth)
  else
    error('SpatialConvolutionLua:updateGradInput - Incorrect number of input dimensions')
  end

  -- Check input
  assert((nInputPlane == numChannels),
    'SpatialConvolutionLua:updateGradInput - Incorrect input size for nInputPlane')

  -- Calculate output height and width
  local outputHeight = math.floor((inputHeight + 2*pH - kH) / dH) + 1
  local outputWidth = math.floor((inputWidth + 2*pW - kW) / dW) + 1

  -- Read gradOutput parameters
  local gradHeight = 0
  local gradWidth = 0
  if gradOutput:dim() == 4 then -- batch
    gradHeight = gradOutput:size(3)
    gradWidth = gradOutput:size(4)
  elseif gradOutput:dim() == 3 then -- image
    gradHeight = gradOutput:size(2)
    gradWidth = gradOutput:size(3)
    gradOutput = gradOutput:view(1, gradOutput:size(1), gradHeight, gradWidth)
  else
    error('SpatialConvolutionLua:updateGradInput - Incorrect number of gradOutput dimensions')
  end

  -- Check gradOutput
  assert((gradOutput:size(1) == batchSize),
    'SpatialConvolutionLua:updateGradInput - Incorrect gradOutput size for batchSize')
  assert((gradOutput:size(2) == nOutputPlane),
    'SpatialConvolutionLua:updateGradInput - Incorrect gradOutput size for nOutputPlane')
  assert((gradHeight == outputHeight) and (gradWidth == outputWidth),
    'SpatialConvolutionLua:updateGradInput - Incorrect gradOutput size for height/width')

  -- The number of zeroes that gradOutput should be padded with
  local _pH, _pW = (kH - pH - 1), (kW - pW - 1)
  -- The number of zeroes that are inserted between gradOutput pixels
  local _zH, _zW = (dH - 1), (dW - 1)
  -- The number of zeros added to the bottom and right edges of the gradOutput
  local _zB, _zR = (inputHeight + 2*pH - kH) % dH, (inputWidth + 2*pW - kW) % dW
  -- Strides for gradOutput with padding _pH,_pW and zeroes _zH,_zW
  local _dH, _dW = 1, 1
  -- Adjusted _gradOutput height and width
  local _gradHeight = gradHeight + _zH*(gradHeight-1) + _zB
  local _gradWidth = gradWidth + _zW*(gradWidth-1) + _zR
  -- Resulting height and width of transposed convolution
  local _inputHeight = dH*(gradHeight - 1) + _zB + kH - 2*pH
  local _inputWidth = dW*(gradWidth - 1) + _zR + kW - 2*pW

  assert((inputHeight == _inputHeight) and (inputWidth == _inputWidth),
    'SpatialConvolutionLua:updateGradInput'
    ..'- failed to calculate transposed convolution parameters!')

  -- Create adapted _gradOutput for transposed convolution
  -- We may need to adjust source gradOutput by inserting zeroes between pixels
  -- and/or adding right and bottom zeroes border for "odd cases"
  local _gradOutput = getAdjustedGradOutput(gradOutput, _zH, _zW, _zB, _zR, _gradHeight, _gradWidth)
  self._gradOutput = _gradOutput

  -- Create reversed (flipped) weight
  local _weight = getReversedWeight(self.weight)

  -- Create resulting gradInput tensor
  local gradInput = torch.Tensor()
    :typeAs(input)
    :resize(batchSize, nInputPlane, inputHeight, inputWidth)

  -- Create a tensor to read 'patches' of _gradOutput image of size (batchSize x nInputPlane x kH x kW)
  local x = torch.Tensor()
    :typeAs(input)
    :resize(batchSize, nInputPlane, kH, kW)

  local xh1, xh2, xw1, xw2
  local oh1, oh2, ow1, ow2
  local ih, iw
  local W

  for o = 1, nOutputPlane do
    ih = 0
    -- Read weight
    W = _weight[o]:view(1, nInputPlane, kH, kW)
      :expand(batchSize, nInputPlane, kH, kW)
    for h = 1 - _pH, _gradHeight + _pH - kH + 1, _dH do
      ih = ih + 1
      iw = 0
      for w = 1 - _pW, _gradWidth + _pW - kW + 1, _dW do
        iw = iw + 1
        -- Fill in input patch x from _gradOutput with respect to padding _pW and _pH
        x:zero()
        xh1, xh2 = h, h + kH - 1
        xw1, xw2 = w, w + kW - 1
        oh1, oh2 = 1, _gradHeight
        ow1, ow2 = 1, _gradWidth
        oh1, oh2 = math.max(xh1, oh1), math.min(xh2, oh2)
        ow1, ow2 = math.max(xw1, ow1), math.min(xw2, ow2)
        if (oh2 >= oh1) and (ow2 >= ow1) then
          xh1, xh2 = (oh1 - xh1 + 1), (kH - xh2 + oh2)
          xw1, xw2 = (ow1 - xw1 + 1), (kW - xw2 + ow2)
          assert(xh2 - xh1 == oh2 - oh1,
            'SpatialConvolutionLua:updateGradInput: Failed to calculate height '
            ..xh1..' '..xh2..' '..oh1..' '..oh2)
          assert(xw2 - xw1 == ow2 - ow1,
            'SpatialConvolutionLua:updateGradInput: Failed to calculate width '
            ..xw1..' '..xw2..' '..ow1..' '..ow2)
          x[{{}, {}, {xh1, xh2}, {xw1, xw2}}] =
            _gradOutput[{{}, {o,o}, {oh1, oh2}, {ow1, ow2}}]
              :expand(batchSize, nInputPlane, oh2-oh1+1, ow2-ow1+1)
        end
        -- Multiply weight W by x and get resulting pixel for every input plane of each batch sample
        --print('x:\n'..tostring(x))
        --print('W:\n'..tostring(W))
        gradInput[{{}, {}, ih, iw}]:add( torch.cmul(W, x):sum(3):sum(4) )
      end -- for inputWidth
    end -- for inputHeight
  end -- for nOutputPlane

  if f_no_batch then
    gradInput:resize(nInputPlane, inputHeight, inputWidth)
  end

  self.gradInput = gradInput
  return gradInput
end -- function SpatialConvolutionLua:updateGradInput

function SpatialConvolutionLua:accGradParameters(input, gradOutput, scale)
  scale = scale or 1

  -- Get initial parameters
  local nInputPlane, nOutputPlane = self.nInputPlane, self.nOutputPlane
  local kW, kH = self.kW, self.kH
  local dW, dH = self.dW, self.dH
  local pW, pH = self.padW, self.padH

  -- Read parameters of input
  local f_no_batch
  local batchSize = 0
  local numChannels = 0
  local inputHeight = 0
  local inputWidth = 0
  if input:dim() == 4 then -- batch
    batchSize = input:size(1)
    numChannels = input:size(2)
    inputHeight = input:size(3)
    inputWidth = input:size(4)
  elseif input:dim() == 3 then -- image
    f_no_batch = true
    batchSize = 1
    numChannels = input:size(1)
    inputHeight = input:size(2)
    inputWidth = input:size(3)
    input = input:view(1, numChannels, inputHeight, inputWidth)
  else
    error('SpatialConvolutionLua:accGradParameters - Incorrect number of input dimensions')
  end

  -- Check input
  assert((nInputPlane == numChannels),
    'SpatialConvolutionLua:accGradParameters - Incorrect input size for nInputPlane')

  -- Calculate output height and width
  local outputHeight = math.floor((inputHeight + 2*pH - kH) / dH) + 1
  local outputWidth = math.floor((inputWidth + 2*pW - kW) / dW) + 1

  -- Read gradOutput parameters
  local gradHeight = 0
  local gradWidth = 0
  if gradOutput:dim() == 4 then -- batch
    gradHeight = gradOutput:size(3)
    gradWidth = gradOutput:size(4)
  elseif gradOutput:dim() == 3 then -- image
    gradHeight = gradOutput:size(2)
    gradWidth = gradOutput:size(3)
    gradOutput = gradOutput:view(1, gradOutput:size(1), gradHeight, gradWidth)
  else
    error('SpatialConvolutionLua:accGradParameters - Incorrect number of gradOutput dimensions')
  end

  -- Check gradOutput
  assert((gradOutput:size(1) == batchSize),
    'SpatialConvolutionLua:accGradParameters - Incorrect gradOutput size for batchSize')
  assert((gradOutput:size(2) == nOutputPlane),
    'SpatialConvolutionLua:accGradParameters - Incorrect gradOutput size for nOutputPlane')
  assert((gradHeight == outputHeight) and (gradWidth == outputWidth),
    'SpatialConvolutionLua:accGradParameters - Incorrect gradOutput size for height/width')

  -- The number of zeroes that are inserted between gradOutput pixels
  local _zH, _zW = (dH - 1), (dW - 1)
  -- The number of zeros added to the bottom and right edges of the gradOutput
  local _zB, _zR = (inputHeight + 2*pH - kH) % dH, (inputWidth + 2*pW - kW) % dW
  -- Strides for gradOutput with padding _pH,_pW and zeroes _zH,_zW
  local _dH, _dW = 1, 1
  -- Adjusted _gradOutput height and width
  local _gradHeight = gradHeight + _zH*(gradHeight-1) + _zB
  local _gradWidth = gradWidth + _zW*(gradWidth-1) + _zR
  local _kH, _kW = _gradHeight, _gradWidth

  -- Reuse or create adapted _gradOutput for transposed convolution
  -- We may need to adjust source gradOutput by inserting zeroes between pixels
  -- and/or adding right and bottom zeroes border for "odd cases"
  local _gradOutput = self._gradOutput
  if (not _gradOutput) or (_gradOutput:size(1) ~= batchSize)
  or (_gradOutput:size(2) ~= nOutputPlane)
  or (_gradOutput:size(3) ~= _gradHeight) or (_gradOutput:size(4) ~= _gradWidth) then
    print('SpatialConvolutionLua:_gradOutput mismatch!')
    _gradOutput = getAdjustedGradOutput(gradOutput, _zH, _zW, _zB, _zR, _gradHeight, _gradWidth)
    self._gradOutput = _gradOutput
  end

  -- Get output tensor gradWeight
  local gradWeight = self.gradWeight

  -- Create a tensor to read 'patches' of input image of size (batchSize x nInputPlane x _kH x _kW)
  local x = torch.Tensor()
    :typeAs(input)
    :resize(batchSize, nInputPlane, _kH, _kW)

  local xh1, xh2, xw1, xw2
  local ih1, ih2, iw1, iw2
  local wh, ww
  local G

  for o = 1, nOutputPlane do
    wh = 0
    for h = 1 - pH, inputHeight + pH - _kH + 1, _dH do
      wh = wh + 1
      ww = 0
      for w = 1 - pW, inputWidth + pW - _kW + 1, _dW do
        ww = ww + 1
        -- Fill in patch x from input with respect to padding pW and pH
        x:zero()
        xh1, xh2 = h, h + _kH - 1
        xw1, xw2 = w, w + _kW - 1
        ih1, ih2 = 1, inputHeight
        iw1, iw2 = 1, inputWidth
        ih1, ih2 = math.max(xh1, ih1), math.min(xh2, ih2)
        iw1, iw2 = math.max(xw1, iw1), math.min(xw2, iw2)
        if (ih2 >= ih1) and (iw2 >= iw1) then
          xh1, xh2 = (ih1 - xh1 + 1), (_kH - xh2 + ih2)
          xw1, xw2 = (iw1 - xw1 + 1), (_kW - xw2 + iw2)
          assert(xh2 - xh1 == ih2 - ih1,
            'SpatialConvolutionLua:accGradParameters: Failed to calculate height '
            ..xh1..' '..xh2..' '..ih1..' '..ih2)
          assert(xw2 - xw1 == iw2 - iw1,
            'SpatialConvolutionLua:accGradParameters: Failed to calculate width '
            ..xw1..' '..xw2..' '..iw1..' '..iw2)
          x[{{}, {}, {xh1, xh2}, {xw1, xw2}}] = input[{{}, {}, {ih1, ih2}, {iw1, iw2}}]
        end
        -- Multiply gradOutput G by x and get resulting weight vector:
        --  gradWeight[o][_all_input_planes_][wh][ww]
        G = _gradOutput[{{}, {o,o}, {}, {}}]:expand(batchSize, nInputPlane, _kH, _kW)
        gradWeight[{o, {}, wh, ww}]:add( scale, torch.cmul(G, x):sum(3):sum(4):sum(1) )
      end -- for inputWidth
    end -- for inputHeight
  end -- for nOutputPlane

  --return gradWeight
end -- SpatialConvolutionLua:accGradParameters

function SpatialConvolutionLua:type(type,tensorCache)
  self.finput = self.finput and torch.Tensor()
  self.fgradInput = self.fgradInput and torch.Tensor()
  return parent.type(self,type,tensorCache)
end

function SpatialConvolutionLua:__tostring__()
  local s = string.format('%s(%d -> %d, %dx%d', torch.type(self),
    self.nInputPlane, self.nOutputPlane, self.kW, self.kH)
  if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then
    s = s .. string.format(', %d,%d', self.dW, self.dH)
  end
  if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then
    s = s .. ', ' .. self.padW .. ',' .. self.padH
  end
  if self.bias then
    return s .. ')'
  else
    return s .. ') without bias'
  end
end

function SpatialConvolutionLua:clearState()
  nn.utils.clear(self, '_gradOutput')
  return parent.clearState(self)
end

## SpatialDepthWiseConvolutionLua.lua
-- Copyright (c) 2018 Pavel Chernov
-- This code is distributed under MIT License.
--
-- This module was written by Pavel Chernov in February 2018 ([email protected])
-- The reason for it was the bogus implementation of SpatialDepthWiseConvolution in torch/nn
-- (see issue here: https://github.com/torch/nn/issues/1307)
-- This code was based on paper by Vincent Dumoulin and Francesco Visin:
-- 'A guide to convolution arithmetic for deep learning' (https://arxiv.org/pdf/1603.07285.pdf)

local torch = require 'torch'
local nn = require 'nn'

local SpatialDepthWiseConvolutionLua, parent = torch.class('nn.SpatialDepthWiseConvolutionLua', 'nn.Module')

function SpatialDepthWiseConvolutionLua:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH)
  parent.__init(self)

  dW = dW or 1
  dH = dH or 1

  self.nInputPlane = nInputPlane
  self.nOutputPlane = nOutputPlane
  self.kW = kW
  self.kH = kH

  self.dW = dW
  self.dH = dH
  self.padW = padW or 0
  self.padH = padH or self.padW

  -- bias is not supported yet. Because I don't know how to calculate gradBias :(
  self.weight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
  --self.bias = torch.Tensor(nOutputPlane, nInputPlane)
  self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
  --self.gradBias = torch.Tensor(nOutputPlane, nInputPlane)

  self:reset()
end

function SpatialDepthWiseConvolutionLua:noBias()
  self.bias = nil
  self.gradBias = nil
  return self
end

function SpatialDepthWiseConvolutionLua:reset(stdv)
  if stdv then
    stdv = stdv * math.sqrt(3)
  else
    stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane)
  end
  if nn.oldSeed then
    self.weight:apply(function()
      return torch.uniform(-stdv, stdv)
    end)
    if self.bias then
      self.bias:apply(function()
        return torch.uniform(-stdv, stdv)
      end)
    end
  else
    self.weight:uniform(-stdv, stdv)
    if self.bias then
      self.bias:uniform(-stdv, stdv)
    end
  end
end

function SpatialDepthWiseConvolutionLua:updateOutput(input)
  -- Backward compatibility
  if self.padding then
    self.padW = self.padding
    self.padH = self.padding
    self.padding = nil
  end

  -- Get initial parameters
  local nInputPlane, nOutputPlane = self.nInputPlane, self.nOutputPlane
  local kW, kH = self.kW, self.kH
  local dW, dH = self.dW, self.dH
  local pW, pH = self.padW, self.padH

  -- Read parameters of input
  local f_no_batch
  local batchSize = 0
  local numChannels = 0
  local inputHeight = 0
  local inputWidth = 0
  if input:dim() == 4 then -- batch
    batchSize = input:size(1)
    numChannels = input:size(2)
    inputHeight = input:size(3)
    inputWidth = input:size(4)
  elseif input:dim() == 3 then -- image
    f_no_batch = true
    batchSize = 1
    numChannels = input:size(1)
    inputHeight = input:size(2)
    inputWidth = input:size(3)
    input = input:view(1, numChannels, inputHeight, inputWidth)
  else
    error('SpatialDepthWiseConvolutionLua:updateOutput - Incorrect number of input dimensions')
  end

  -- Check input
  assert((nInputPlane == numChannels),
    'SpatialDepthWiseConvolutionLua:updateOutput - Incorrect input size for nInputPlane')

  -- Calculate output height and width
  local outputHeight = math.floor((inputHeight + 2*pH - kH) / dH) + 1
  local outputWidth = math.floor((inputWidth + 2*pW - kW) / dW) + 1

  -- Create output tensor
  local output = torch.Tensor()
    :typeAs(input)
    :resize(batchSize, nInputPlane*nOutputPlane, outputHeight, outputWidth)
    :zero()
  print('Output size: '..batchSize..' '..(nInputPlane*nOutputPlane)..' '..outputHeight..' '..outputWidth)

  -- Create a tensor to read 'patches' of input image of size (batchSize x nInputPlane x kH x kW)
  local x = torch.Tensor()
    :typeAs(input)
    :resize(batchSize, nInputPlane, kH, kW)

  local xh1, xh2, xw1, xw2
  local ih1, ih2, iw1, iw2
  local oh, ow
  local W, B

  for o = 1, nOutputPlane do
    oh = 0
    -- Read weight and bias
    W = self.weight[o]:view(1, nInputPlane, kH, kW)
      :expand(batchSize, nInputPlane, kH, kW)
    B = 0
    if self.bias then
      B = self.bias[o]:view(1, nInputPlane):expand(batchSize, nInputPlane)
    end
    for h = 1 - pH, inputHeight + pH - kH + 1, dH do
      oh = oh + 1
      ow = 0
      for w = 1 - pW, inputWidth + pW - kW + 1, dW do
        ow = ow + 1
        -- Fill in patch x from input with respect to padding pW and pH
        x:zero()
        xh1, xh2 = h, h + kH - 1
        xw1, xw2 = w, w + kW - 1
        ih1, ih2 = 1, inputHeight
        iw1, iw2 = 1, inputWidth
        ih1, ih2 = math.max(xh1, ih1), math.min(xh2, ih2)
        iw1, iw2 = math.max(xw1, iw1), math.min(xw2, iw2)
        if (ih2 >= ih1) and (iw2 >= iw1) then
          xh1, xh2 = (ih1 - xh1 + 1), (kH - xh2 + ih2)
          xw1, xw2 = (iw1 - xw1 + 1), (kW - xw2 + iw2)
          assert(xh2 - xh1 == ih2 - ih1,
            'SpatialDepthWiseConvolutionLua:updateOutput: Failed to calculate height '
            ..xh1..' '..xh2..' '..ih1..' '..ih2)
          assert(xw2 - xw1 == iw2 - iw1,
            'SpatialDepthWiseConvolutionLua:updateOutput: Failed to calculate width '
            ..xw1..' '..xw2..' '..iw1..' '..iw2)
          x[{{}, {}, {xh1, xh2}, {xw1, xw2}}] = input[{{}, {}, {ih1, ih2}, {iw1, iw2}}]
        end
        -- Multiply weight W by x and get resulting pixel for every output plane of each batch sample
        output[{{}, {(o-1)*nInputPlane+1, o*nInputPlane}, oh, ow}]
          :add( torch.cmul(W, x):sum(3):sum(4) + B )
      end -- for inputWidth
    end -- for inputHeight
  end -- for nOutputPlane

  if f_no_batch then
    output:resize(nInputPlane*nOutputPlane, outputHeight, outputWidth)
  end

  self.output = output
  return output
end -- function SpatialDepthWiseConvolutionLua:updateOutput

local function getAdjustedGradOutput(gradOutput, _zH, _zW, _zB, _zR, _gradHeight, _gradWidth)
  local gradHeight, gradWidth = gradOutput:size(3), gradOutput:size(4)
  local _gradOutput
  if (_zH <= 0) and (_zW <= 0) then
    if (_zB <= 0) and (_zR <= 0) then
      -- Just use source gradOutput
      _gradOutput = gradOutput
    else
      -- Add zeroes at right and/or bottom
      _gradOutput = torch.Tensor()
        :typeAs(gradOutput)
        :resize(gradOutput:size(1), gradOutput:size(2), _gradHeight, _gradWidth)
        :zero()
      _gradOutput[{{}, {}, {1,gradHeight}, {1,gradWidth}}] = gradOutput
    end
  else
    -- Insert zeroes between pixels and/or add right/bottom zeroes border
    _gradOutput = torch.Tensor()
      :typeAs(gradOutput)
      :resize(gradOutput:size(1), gradOutput:size(2), _gradHeight, _gradWidth)
      :zero()
    for h = 1, gradHeight do
      for w = 1, gradWidth do
        _gradOutput[{{}, {}, (_zH+1)*(h-1)+1, (_zW+1)*(w-1)+1}] = gradOutput[{{}, {}, h, w}]
      end
    end
  end
  return _gradOutput
end -- function getAdjustedGradOutput

local function getReversedWeight(weight)
  local nOutputPlane, nInputPlane, kH, kW =
    weight:size(1), weight:size(2), weight:size(3), weight:size(4)

  weight = weight:view(nOutputPlane, nInputPlane, kH*kW)

  local _weight = torch.Tensor()
    :typeAs(weight)
    :resizeAs(weight)

  local reverse = torch.range(kH*kW, 1, -1):long()

  for o = 1, weight:size(1) do
    for i = 1, weight:size(2) do
      _weight[o][i] = weight[o][i]:index(1, reverse)
    end
  end

  _weight:resize(nOutputPlane, nInputPlane, kH, kW)
  return _weight
end -- function getReversedWeight

function SpatialDepthWiseConvolutionLua:updateGradInput(input, gradOutput)
  -- Get initial parameters
  local nInputPlane, nOutputPlane = self.nInputPlane, self.nOutputPlane
  local kW, kH = self.kW, self.kH
  local dW, dH = self.dW, self.dH
  local pW, pH = self.padW, self.padH

  -- Read input parameters
  local f_no_batch
  local batchSize = 0
  local numChannels = 0
  local inputHeight = 0
  local inputWidth = 0
  if input:dim() == 4 then -- batch
    batchSize = input:size(1)
    numChannels = input:size(2)
    inputHeight = input:size(3)
    inputWidth = input:size(4)
  elseif input:dim() == 3 then -- image
    f_no_batch = true
    batchSize = 1
    numChannels = input:size(1)
    inputHeight = input:size(2)
    inputWidth = input:size(3)
    input = input:view(1, numChannels, inputHeight, inputWidth)
  else
    error('SpatialDepthWiseConvolutionLua:updateGradInput - Incorrect number of input dimensions')
  end

  -- Check input
  assert((nInputPlane == numChannels),
    'SpatialDepthWiseConvolutionLua:updateGradInput - Incorrect input size for nInputPlane')

  -- Calculate output height and width
  local outputHeight = math.floor((inputHeight + 2*pH - kH) / dH) + 1
  local outputWidth = math.floor((inputWidth + 2*pW - kW) / dW) + 1

  -- Read gradOutput parameters
  local gradHeight = 0
  local gradWidth = 0
  if gradOutput:dim() == 4 then -- batch
    gradHeight = gradOutput:size(3)
    gradWidth = gradOutput:size(4)
  elseif gradOutput:dim() == 3 then -- image
    gradHeight = gradOutput:size(2)
    gradWidth = gradOutput:size(3)
    gradOutput = gradOutput:view(1, gradOutput:size(1), gradHeight, gradWidth)
  else
    error('SpatialDepthWiseConvolutionLua:updateGradInput - Incorrect number of gradOutput dimensions')
  end

  -- Check gradOutput
  assert((gradOutput:size(1) == batchSize),
    'SpatialDepthWiseConvolutionLua:updateGradInput - Incorrect gradOutput size for batchSize')
  assert((gradOutput:size(2) == nOutputPlane*nInputPlane),
    'SpatialDepthWiseConvolutionLua:updateGradInput - Incorrect gradOutput size for nOutputPlane*nInputPlane')
  assert((gradHeight == outputHeight) and (gradWidth == outputWidth),
    'SpatialDepthWiseConvolutionLua:updateGradInput - Incorrect gradOutput size for height/width')

  -- The number of zeroes that gradOutput should be padded with
  local _pH, _pW = (kH - pH - 1), (kW - pW - 1)
  -- The number of zeroes that are inserted between gradOutput pixels
  local _zH, _zW = (dH - 1), (dW - 1)
  -- The number of zeros added to the bottom and right edges of the gradOutput
  local _zB, _zR = (inputHeight + 2*pH - kH) % dH, (inputWidth + 2*pW - kW) % dW
  -- Strides for gradOutput with padding _pH,_pW and zeroes _zH,_zW
  local _dH, _dW = 1, 1
  -- Adjusted _gradOutput height and width
  local _gradHeight = gradHeight + _zH*(gradHeight-1) + _zB
  local _gradWidth = gradWidth + _zW*(gradWidth-1) + _zR
  -- Resulting height and width of transposed convolution
  local _inputHeight = dH*(gradHeight - 1) + _zB + kH - 2*pH
  local _inputWidth = dW*(gradWidth - 1) + _zR + kW - 2*pW

  assert((inputHeight == _inputHeight) and (inputWidth == _inputWidth),
    'SpatialDepthWiseConvolutionLua:updateGradInput'
    ..'- failed to calculate transposed convolution parameters!')

  -- Create adapted _gradOutput for transposed convolution
  -- We may need to adjust source gradOutput by inserting zeroes between pixels
  -- and/or adding right and bottom zeroes border for "odd cases"
  local _gradOutput = getAdjustedGradOutput(gradOutput, _zH, _zW, _zB, _zR, _gradHeight, _gradWidth)
  self._gradOutput = _gradOutput

  -- Create reversed (flipped) weight
  local _weight = getReversedWeight(self.weight)

  -- Create resulting gradInput tensor
  local gradInput = torch.Tensor()
    :typeAs(input)
    :resize(batchSize, nInputPlane, inputHeight, inputWidth)

  -- Create a tensor to read 'patches' of _gradOutput image of size (batchSize x nInputPlane x kH x kW)
  local x = torch.Tensor()
    :typeAs(input)
    :resize(batchSize, nInputPlane, kH, kW)

  local xh1, xh2, xw1, xw2
  local oh1, oh2, ow1, ow2
  local ih, iw
  local W

  for o = 1, nOutputPlane do
    ih = 0
    -- Read weight
    W = _weight[o]:view(1, nInputPlane, kH, kW)
      :expand(batchSize, nInputPlane, kH, kW)
    for h = 1 - _pH, _gradHeight + _pH - kH + 1, _dH do
      ih = ih + 1
      iw = 0
      for w = 1 - _pW, _gradWidth + _pW - kW + 1, _dW do
        iw = iw + 1
        -- Fill in input patch x from _gradOutput with respect to padding _pW and _pH
        x:zero()
        xh1, xh2 = h, h + kH - 1
        xw1, xw2 = w, w + kW - 1
        oh1, oh2 = 1, _gradHeight
        ow1, ow2 = 1, _gradWidth
        oh1, oh2 = math.max(xh1, oh1), math.min(xh2, oh2)
        ow1, ow2 = math.max(xw1, ow1), math.min(xw2, ow2)
        if (oh2 >= oh1) and (ow2 >= ow1) then
          xh1, xh2 = (oh1 - xh1 + 1), (kH - xh2 + oh2)
          xw1, xw2 = (ow1 - xw1 + 1), (kW - xw2 + ow2)
          assert(xh2 - xh1 == oh2 - oh1,
            'SpatialDepthWiseConvolutionLua:updateGradInput: Failed to calculate height '
            ..xh1..' '..xh2..' '..oh1..' '..oh2)
          assert(xw2 - xw1 == ow2 - ow1,
            'SpatialDepthWiseConvolutionLua:updateGradInput: Failed to calculate width '
            ..xw1..' '..xw2..' '..ow1..' '..ow2)
          x[{{}, {}, {xh1, xh2}, {xw1, xw2}}] =
            _gradOutput[{{}, {(o-1)*nInputPlane+1, o*nInputPlane}, {oh1, oh2}, {ow1, ow2}}]
        end
        -- Multiply weight W by x and get resulting pixel for every input plane of each batch sample
        gradInput[{{}, {}, ih, iw}]:add( torch.cmul(W, x):sum(3):sum(4) )
      end -- for inputWidth
    end -- for inputHeight
  end -- for nOutputPlane

  if f_no_batch then
    gradInput:resize(nInputPlane, inputHeight, inputWidth)
  end

  self.gradInput = gradInput
  return gradInput
end -- function SpatialDepthWiseConvolutionLua:updateGradInput

function SpatialDepthWiseConvolutionLua:accGradParameters(input, gradOutput, scale)
  scale = scale or 1

  -- Get initial parameters
  local nInputPlane, nOutputPlane = self.nInputPlane, self.nOutputPlane
  local kW, kH = self.kW, self.kH
  local dW, dH = self.dW, self.dH
  local pW, pH = self.padW, self.padH

  -- Read parameters of input
  local f_no_batch
  local batchSize = 0
  local numChannels = 0
  local inputHeight = 0
  local inputWidth = 0
  if input:dim() == 4 then -- batch
    batchSize = input:size(1)
    numChannels = input:size(2)
    inputHeight = input:size(3)
    inputWidth = input:size(4)
  elseif input:dim() == 3 then -- image
    f_no_batch = true
    batchSize = 1
    numChannels = input:size(1)
    inputHeight = input:size(2)
    inputWidth = input:size(3)
    input = input:view(1, numChannels, inputHeight, inputWidth)
  else
    error('SpatialDepthWiseConvolutionLua:accGradParameters - Incorrect number of input dimensions')
  end

  -- Check input
  assert((nInputPlane == numChannels),
    'SpatialDepthWiseConvolutionLua:accGradParameters - Incorrect input size for nInputPlane')

  -- Calculate output height and width
  local outputHeight = math.floor((inputHeight + 2*pH - kH) / dH) + 1
  local outputWidth = math.floor((inputWidth + 2*pW - kW) / dW) + 1

  -- Read gradOutput parameters
  local gradHeight = 0
  local gradWidth = 0
  if gradOutput:dim() == 4 then -- batch
    gradHeight = gradOutput:size(3)
    gradWidth = gradOutput:size(4)
  elseif gradOutput:dim() == 3 then -- image
    gradHeight = gradOutput:size(2)
    gradWidth = gradOutput:size(3)
    gradOutput = gradOutput:view(1, gradOutput:size(1), gradHeight, gradWidth)
  else
    error('SpatialDepthWiseConvolutionLua:accGradParameters - Incorrect number of gradOutput dimensions')
  end

  -- Check gradOutput
  assert((gradOutput:size(1) == batchSize),
    'SpatialDepthWiseConvolutionLua:accGradParameters - Incorrect gradOutput size for batchSize')
  assert((gradOutput:size(2) == nOutputPlane*nInputPlane),
    'SpatialDepthWiseConvolutionLua:accGradParameters - Incorrect gradOutput size for nOutputPlane*nInputPlane')
  assert((gradHeight == outputHeight) and (gradWidth == outputWidth),
    'SpatialDepthWiseConvolutionLua:accGradParameters - Incorrect gradOutput size for height/width')

  -- The number of zeroes that are inserted between gradOutput pixels
  local _zH, _zW = (dH - 1), (dW - 1)
  -- The number of zeros added to the bottom and right edges of the gradOutput
  local _zB, _zR = (inputHeight + 2*pH - kH) % dH, (inputWidth + 2*pW - kW) % dW
    -- Strides for gradOutput with padding _pH,_pW and zeroes _zH,_zW
  local _dH, _dW = 1, 1
-- Adjusted _gradOutput height and width
  local _gradHeight = gradHeight + _zH*(gradHeight-1) + _zB
  local _gradWidth = gradWidth + _zW*(gradWidth-1) + _zR
  local _kH, _kW = _gradHeight, _gradWidth

  -- Create adapted _gradOutput for transposed convolution
  -- We may need to adjust source gradOutput by inserting zeroes between pixels
  -- and/or adding right and bottom zeroes border for "odd cases"
  local _gradOutput = self._gradOutput
  if (not _gradOutput) or (_gradOutput:size(1) ~= batchSize)
  or (_gradOutput:size(2) ~= nOutputPlane*nInputPlane)
  or (_gradOutput:size(3) ~= _gradHeight) or (_gradOutput:size(4) ~= _gradWidth) then
    print('_gradOutput mismatch!')
    _gradOutput = getAdjustedGradOutput(gradOutput, _zH, _zW, _zB, _zR, _gradHeight, _gradWidth)
    self._gradOutput = _gradOutput
  end

  -- Get output tensor gradWeight
  local gradWeight = self.gradWeight

  -- Create a tensor to read 'patches' of input image of size (batchSize x nInputPlane x _kH x _kW)
  local x = torch.Tensor()
    :typeAs(input)
    :resize(batchSize, nInputPlane, _kH, _kW)

  local xh1, xh2, xw1, xw2
  local ih1, ih2, iw1, iw2
  local wh, ww
  local G

  for o = 1, nOutputPlane do
    wh = 0
    for h = 1 - pH, inputHeight + pH - _kH + 1, _dH do
      wh = wh + 1
      ww = 0
      for w = 1 - pW, inputWidth + pW - _kW + 1, _dW do
        ww = ww + 1
        -- Fill in patch x from input with respect to padding pW and pH
        x:zero()
        xh1, xh2 = h, h + _kH - 1
        xw1, xw2 = w, w + _kW - 1
        ih1, ih2 = 1, inputHeight
        iw1, iw2 = 1, inputWidth
        ih1, ih2 = math.max(xh1, ih1), math.min(xh2, ih2)
        iw1, iw2 = math.max(xw1, iw1), math.min(xw2, iw2)
        if (ih2 >= ih1) and (iw2 >= iw1) then
          xh1, xh2 = (ih1 - xh1 + 1), (_kH - xh2 + ih2)
          xw1, xw2 = (iw1 - xw1 + 1), (_kW - xw2 + iw2)
          assert(xh2 - xh1 == ih2 - ih1,
            'SpatialDepthWiseConvolutionLua:accGradParameters: Failed to calculate height '
            ..xh1..' '..xh2..' '..ih1..' '..ih2)
          assert(xw2 - xw1 == iw2 - iw1,
            'SpatialDepthWiseConvolutionLua:accGradParameters: Failed to calculate width '
            ..xw1..' '..xw2..' '..iw1..' '..iw2)
          x[{{}, {}, {xh1, xh2}, {xw1, xw2}}] = input[{{}, {}, {ih1, ih2}, {iw1, iw2}}]
        end
        -- Multiply gradOutput G by x and get resulting weight vector:
        --  gradWeight[o][_all_input_planes_][wh][ww]
        G = _gradOutput[{{}, {(o-1)*nInputPlane+1, o*nInputPlane}, {}, {}}]
        gradWeight[{o, {}, wh, ww}]:add( scale, torch.cmul(G, x):sum(3):sum(4):sum(1) )
      end -- for inputWidth
    end -- for inputHeight
  end -- for nOutputPlane

  --return gradWeight
end -- SpatialDepthWiseConvolutionLua:accGradParameters

function SpatialDepthWiseConvolutionLua:type(type,tensorCache)
  self.finput = self.finput and torch.Tensor()
  self.fgradInput = self.fgradInput and torch.Tensor()
  return parent.type(self,type,tensorCache)
end

function SpatialDepthWiseConvolutionLua:__tostring__()
  local s = string.format('%s(%d -> %d, %dx%d', torch.type(self),
    self.nInputPlane, self.nOutputPlane, self.kW, self.kH)
  if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then
    s = s .. string.format(', %d,%d', self.dW, self.dH)
  end
  if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then
    s = s .. ', ' .. self.padW .. ',' .. self.padH
  end
  if self.bias then
    return s .. ')'
  else
    return s .. ') without bias'
  end
end

function SpatialDepthWiseConvolutionLua:clearState()
  nn.utils.clear(self, '_gradOutput')
  return parent.clearState(self)
end

## test_SpatialConvolutionLua.lua
-- Copyright (c) 2018 Pavel Chernov
-- This code is distributed under MIT License.
--
-- This module was written by Pavel Chernov in February 2018 ([email protected])
-- The reason was to verify my understanding of convolution and it's gradient calculation.
-- This code was based on paper by Vincent Dumoulin and Francesco Visin:
-- 'A guide to convolution arithmetic for deep learning' (https://arxiv.org/pdf/1603.07285.pdf)

local torch = require 'torch'
local nn = require 'nn'

dofile 'SpatialConvolutionLua.lua'

local function test()
  local batchSize, nInputPlane, inputHeight, inputWidth =
    torch.random(1,2), torch.random(1,3), torch.random(2,6), torch.random(3,7)
  local nOutputPlane = torch.random(2,5)
  local kW = math.min(inputWidth,  torch.random(1,math.floor(inputWidth/2))*2+1)
  local kH = math.min(inputHeight, torch.random(1,math.floor(inputHeight/2))*2+1)
  local dW, dH = torch.random(1,kW-1), torch.random(1,kH-1)
  local pW, pH = torch.random(1,kW-1), torch.random(1,kH-1)
  print('Test parameters: input:'..batchSize..' x '..nInputPlane
    ..' x '..inputHeight..' x '..inputWidth
    ..' nOutputPlane: '..nOutputPlane
    ..' kH kW: '..kW..'x'..kH
    ..' dH dW : '..dW..'x'..dH
    ..' pH pW: '..pW..'x'..pH)
  io.flush()

  local input = torch.ones(batchSize, nInputPlane, inputHeight, inputWidth)
  print('input:\n'..tostring(input))

  local weight = torch.randn(nOutputPlane, nInputPlane, kH, kW)
  print('weight:\n'..tostring(weight))

  local raw = nn.SpatialConvolution   (nInputPlane, nOutputPlane, kW, kH, dW, dH, pW, pH):noBias()
  local lua = nn.SpatialConvolutionLua(nInputPlane, nOutputPlane, kW, kH, dW, dH, pW, pH):noBias()

  raw.weight:copy(weight)
  lua.weight:copy(weight)

  local function testStage(name, raw, lua)
    io.write(name..' ')
    io.flush()
    local err2 = torch.norm(torch.csub(raw, lua))
    if (err2 > 0.01) then
      print('FAILED! Err='..tostring(err2))
      print('raw:\n'..tostring(raw))
      print('lua:\n'..tostring(lua))
      print('error:\n'..tostring(raw:ne(lua)))
      error(name..' FAILED!')
    end
    print('PASSED')
  end

  local raw_output = raw:forward(input)
  local lua_output = lua:forward(input)
  testStage('forward', raw_output, lua_output)

  print('output: '..tostring(raw_output))
  local gradOutput = raw_output

  raw:zeroGradParameters()
  lua:zeroGradParameters()
  local raw_gradInput = raw:backward(input, gradOutput)
  local lua_gradInput = lua:backward(input, gradOutput)
  print('_gradOutput:\n'..tostring(lua._gradOutput))
  --testStage('accGradParameters:_gradOutput', raw._gradOutput, lua._gradOutput)
  testStage('updateGradInput', raw_gradInput, lua_gradInput)

  raw:accGradParameters(input, gradOutput, 1)
  lua:accGradParameters(input, gradOutput, 1)
  testStage('accGradParameters:gradWeight', raw.gradWeight, lua.gradWeight)
end

test()

## test_SpatialDepthWiseConvolutionLua.lua
-- Copyright (c) 2018 Pavel Chernov
-- This code is distributed under MIT License.
--
-- This module was written by Pavel Chernov in February 2018 ([email protected])
-- The reason for it was the bogus implementation of SpatialDepthWiseConvolution in torch/nn
-- (see issue here: https://github.com/torch/nn/issues/1307)
-- This code was based on paper by Vincent Dumoulin and Francesco Visin:
-- 'A guide to convolution arithmetic for deep learning' (https://arxiv.org/pdf/1603.07285.pdf)

local torch = require 'torch'
local nn = require 'nn'

dofile 'SpatialDepthWiseConvolutionLua.lua'

local function test()
  local batchSize, nInputPlane, inputHeight, inputWidth =
    torch.random(1,2), torch.random(1,3), torch.random(2,6), torch.random(3,7)
  local nOutputPlane = torch.random(2,5)
  local kW = math.min(inputWidth,  torch.random(1,math.floor(inputWidth/2))*2+1)
  local kH = math.min(inputHeight, torch.random(1,math.floor(inputHeight/2))*2+1)
  local dW, dH = torch.random(1,kW-1), torch.random(1,kH-1)
  local pW, pH = torch.random(1,kW-1), torch.random(1,kH-1)
  print('Test parameters: input:'..batchSize..' x '..nInputPlane
    ..' x '..inputHeight..' x '..inputWidth
    ..' nOutputPlane: '..nOutputPlane
    ..' kH kW: '..kW..'x'..kH
    ..' dH dW : '..dW..'x'..dH
    ..' pH pW: '..pW..'x'..pH)
  io.flush()

  local input = torch.ones(batchSize, nInputPlane, inputHeight, inputWidth)
  print('input:\n'..tostring(input))

  local weight = torch.randn(nOutputPlane, nInputPlane, kH, kW)
  print('weight:\n'..tostring(weight))

  local raw = nn.Concat(2)
  for o = 1, nOutputPlane do
    local out = nn.Parallel(2, 2)
    for i = 1, nInputPlane do
      local seq = nn.Sequential()
      local conv = nn.SpatialConvolution(1, 1, kW, kH, dW, dH, pW, pH):noBias()
      conv.weight:copy(weight[o][i])
      seq:add( nn.Reshape(1, inputHeight, inputWidth) )
      seq:add( conv )
      out:add( seq )
    end
    raw:add( out )
  end

  local lua = nn.SpatialDepthWiseConvolutionLua(nInputPlane, nOutputPlane, kW, kH, dW, dH, pW, pH):noBias()
  lua.weight:copy(weight)

  local function testStage(name, raw, lua)
    io.write(name..' ')
    io.flush()
    local err2 = torch.norm(torch.csub(raw, lua))
    if (err2 > 0.01) then
      print('FAILED! Err='..tostring(err2))
      print('raw:\n'..tostring(raw))
      print('lua:\n'..tostring(lua))
      print('error:\n'..tostring(raw:ne(lua)))
      error(name..' FAILED!')
    end
    print('PASSED')
  end

  local lua_output = lua:forward(input)
  local raw_output = raw:forward(input)
  testStage('forward', raw_output, lua_output)

  print('raw_output: '..tostring(raw_output))
  print('lua_output: '..tostring(lua_output))
  local gradOutput = lua_output

  raw:zeroGradParameters()
  lua:zeroGradParameters()
  local raw_gradInput = raw:backward(input, gradOutput)
  local lua_gradInput = lua:backward(input, gradOutput)
  --print('_gradOutput:\n'..tostring(lua._gradOutput))
  testStage('updateGradInput', raw_gradInput, lua_gradInput)

  local _, raw_gradWeight = raw:getParameters()
  testStage('accGradParameters:gradWeight', raw_gradWeight, lua.gradWeight)
end

test()
	MIT License

	Copyright (c) 2018 Pavel Chernov

	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights
	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	copies of the Software, and to permit persons to whom the Software is
	furnished to do so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in all
	copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	SOFTWARE.
	-- Copyright (c) 2018 Pavel Chernov
	-- This code is distributed under MIT License.
	--
	-- This module was written by Pavel Chernov in February 2018 ([email protected])
	-- The reason was to verify my understanding of convolution and it's gradient calculation.
	-- This code was based on paper by Vincent Dumoulin and Francesco Visin:
	-- 'A guide to convolution arithmetic for deep learning' (https://arxiv.org/pdf/1603.07285.pdf)

	local torch = require 'torch'
	local nn = require 'nn'

	local SpatialConvolutionLua, parent = torch.class('nn.SpatialConvolutionLua', 'nn.Module')

	function SpatialConvolutionLua:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH)
	parent.__init(self)

	dW = dW or 1
	dH = dH or 1

	self.nInputPlane = nInputPlane
	self.nOutputPlane = nOutputPlane
	self.kW = kW
	self.kH = kH

	self.dW = dW
	self.dH = dH
	self.padW = padW or 0
	self.padH = padH or self.padW

	-- bias is not supported yet. Because I don't know how to calculate gradBias :(
	self.weight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
	--self.bias = torch.Tensor(nOutputPlane)
	self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
	--self.gradBias = torch.Tensor(nOutputPlane)

	self:reset()
	end

	function SpatialConvolutionLua:noBias()
	self.bias = nil
	self.gradBias = nil
	return self
	end

	function SpatialConvolutionLua:reset(stdv)
	if stdv then
	stdv = stdv * math.sqrt(3)
	else
	stdv = 1/math.sqrt(self.kWself.kHself.nInputPlane)
	end
	if nn.oldSeed then
	self.weight:apply(function()
	return torch.uniform(-stdv, stdv)
	end)
	if self.bias then
	self.bias:apply(function()
	return torch.uniform(-stdv, stdv)
	end)
	end
	else
	self.weight:uniform(-stdv, stdv)
	if self.bias then
	self.bias:uniform(-stdv, stdv)
	end
	end
	end

	function SpatialConvolutionLua:updateOutput(input)
	-- Backward compatibility
	if self.padding then
	self.padW = self.padding
	self.padH = self.padding
	self.padding = nil
	end

	-- Get initial parameters
	local nInputPlane, nOutputPlane = self.nInputPlane, self.nOutputPlane
	local kW, kH = self.kW, self.kH
	local dW, dH = self.dW, self.dH
	local pW, pH = self.padW, self.padH

	-- Read parameters of input
	local f_no_batch
	local batchSize = 0
	local numChannels = 0
	local inputHeight = 0
	local inputWidth = 0
	if input:dim() == 4 then -- batch
	batchSize = input:size(1)
	numChannels = input:size(2)
	inputHeight = input:size(3)
	inputWidth = input:size(4)
	elseif input:dim() == 3 then -- image
	f_no_batch = true
	batchSize = 1
	numChannels = input:size(1)
	inputHeight = input:size(2)
	inputWidth = input:size(3)
	input = input:view(1, numChannels, inputHeight, inputWidth)
	else
	error('SpatialConvolutionLua:updateOutput - Incorrect number of input dimensions')
	end

	-- Check input
	assert((nInputPlane == numChannels),
	'SpatialConvolutionLua:updateOutput - Incorrect input size for nInputPlane')

	-- Calculate output height and width
	local outputHeight = math.floor((inputHeight + 2*pH - kH) / dH) + 1
	local outputWidth = math.floor((inputWidth + 2*pW - kW) / dW) + 1

	-- Create output tensor
	local output = torch.Tensor()
	:typeAs(input)
	:resize(batchSize, nOutputPlane, outputHeight, outputWidth)
	:zero()

	-- Create a tensor to read 'patches' of input image of size (batchSize x nInputPlane x kH x kW)
	local x = torch.Tensor()
	:typeAs(input)
	:resize(batchSize, nInputPlane, kH, kW)

	local xh1, xh2, xw1, xw2
	local ih1, ih2, iw1, iw2
	local oh, ow
	local W, B

	for o = 1, nOutputPlane do
	oh = 0
	-- Read weight and bias
	W = self.weight[o]:view(1, nInputPlane, kH, kW)
	:expand(batchSize, nInputPlane, kH, kW)
	B = 0
	if self.bias then
	B = self.bias[o]:view(1, nInputPlane):expand(batchSize, nInputPlane)
	end
	for h = 1 - pH, inputHeight + pH - kH + 1, dH do
	oh = oh + 1
	ow = 0
	for w = 1 - pW, inputWidth + pW - kW + 1, dW do
	ow = ow + 1
	-- Fill in patch x from input with respect to padding pW and pH
	x:zero()
	xh1, xh2 = h, h + kH - 1
	xw1, xw2 = w, w + kW - 1
	ih1, ih2 = 1, inputHeight
	iw1, iw2 = 1, inputWidth
	ih1, ih2 = math.max(xh1, ih1), math.min(xh2, ih2)
	iw1, iw2 = math.max(xw1, iw1), math.min(xw2, iw2)
	if (ih2 >= ih1) and (iw2 >= iw1) then
	xh1, xh2 = (ih1 - xh1 + 1), (kH - xh2 + ih2)
	xw1, xw2 = (iw1 - xw1 + 1), (kW - xw2 + iw2)
	assert(xh2 - xh1 == ih2 - ih1,
	'SpatialConvolutionLua:updateOutput: Failed to calculate height '
	..xh1..' '..xh2..' '..ih1..' '..ih2)
	assert(xw2 - xw1 == iw2 - iw1,
	'SpatialConvolutionLua:updateOutput: Failed to calculate width '
	..xw1..' '..xw2..' '..iw1..' '..iw2)
	x[{{}, {}, {xh1, xh2}, {xw1, xw2}}] = input[{{}, {}, {ih1, ih2}, {iw1, iw2}}]
	end
	-- Multiply weight W by x and get resulting pixel for every output plane of each batch sample
	output[{{}, o, oh, ow}]
	:add( torch.cmul(W, x):sum(2):sum(3):sum(4) + B )
	end -- for inputWidth
	end -- for inputHeight
	end -- for nOutputPlane

	if f_no_batch then
	output:resize(nOutputPlane, outputHeight, outputWidth)
	end

	self.output = output
	return output
	end -- function SpatialConvolutionLua:updateOutput

	local function getAdjustedGradOutput(gradOutput, _zH, _zW, _zB, _zR, _gradHeight, _gradWidth)
	local gradHeight, gradWidth = gradOutput:size(3), gradOutput:size(4)
	local _gradOutput
	if (_zH <= 0) and (_zW <= 0) then
	if (_zB <= 0) and (_zR <= 0) then
	-- Just use source gradOutput
	_gradOutput = gradOutput
	else
	-- Add zeroes at right and/or bottom
	_gradOutput = torch.Tensor()
	:typeAs(gradOutput)
	:resize(gradOutput:size(1), gradOutput:size(2), _gradHeight, _gradWidth)
	:zero()
	_gradOutput[{{}, {}, {1,gradHeight}, {1,gradWidth}}] = gradOutput
	end
	else
	-- Insert zeroes between pixels and/or add right/bottom zeroes border
	_gradOutput = torch.Tensor()
	:typeAs(gradOutput)
	:resize(gradOutput:size(1), gradOutput:size(2), _gradHeight, _gradWidth)
	:zero()
	for h = 1, gradHeight do
	for w = 1, gradWidth do
	_gradOutput[{{}, {}, (_zH+1)(h-1)+1, (_zW+1)(w-1)+1}] = gradOutput[{{}, {}, h, w}]
	end
	end
	end
	return _gradOutput
	end -- function getAdjustedGradOutput

	local function getReversedWeight(weight)
	local nOutputPlane, nInputPlane, kH, kW =
	weight:size(1), weight:size(2), weight:size(3), weight:size(4)

	weight = weight:view(nOutputPlane, nInputPlane, kH*kW)

	local _weight = torch.Tensor()
	:typeAs(weight)
	:resizeAs(weight)

	local reverse = torch.range(kH*kW, 1, -1):long()

	for o = 1, weight:size(1) do
	for i = 1, weight:size(2) do
	_weight[o][i] = weight[o][i]:index(1, reverse)
	end
	end

	_weight:resize(nOutputPlane, nInputPlane, kH, kW)
	return _weight
	end -- function getReversedWeight

	function SpatialConvolutionLua:updateGradInput(input, gradOutput)
	-- Get initial parameters
	local nInputPlane, nOutputPlane = self.nInputPlane, self.nOutputPlane
	local kW, kH = self.kW, self.kH
	local dW, dH = self.dW, self.dH
	local pW, pH = self.padW, self.padH

	-- Read input parameters
	local f_no_batch
	local batchSize = 0
	local numChannels = 0
	local inputHeight = 0
	local inputWidth = 0
	if input:dim() == 4 then -- batch
	batchSize = input:size(1)
	numChannels = input:size(2)
	inputHeight = input:size(3)
	inputWidth = input:size(4)
	elseif input:dim() == 3 then -- image
	f_no_batch = true
	batchSize = 1
	numChannels = input:size(1)
	inputHeight = input:size(2)
	inputWidth = input:size(3)
	input = input:view(1, numChannels, inputHeight, inputWidth)
	else
	error('SpatialConvolutionLua:updateGradInput - Incorrect number of input dimensions')
	end

	-- Check input
	assert((nInputPlane == numChannels),
	'SpatialConvolutionLua:updateGradInput - Incorrect input size for nInputPlane')

	-- Calculate output height and width
	local outputHeight = math.floor((inputHeight + 2*pH - kH) / dH) + 1
	local outputWidth = math.floor((inputWidth + 2*pW - kW) / dW) + 1

	-- Read gradOutput parameters
	local gradHeight = 0
	local gradWidth = 0
	if gradOutput:dim() == 4 then -- batch
	gradHeight = gradOutput:size(3)
	gradWidth = gradOutput:size(4)
	elseif gradOutput:dim() == 3 then -- image
	gradHeight = gradOutput:size(2)
	gradWidth = gradOutput:size(3)
	gradOutput = gradOutput:view(1, gradOutput:size(1), gradHeight, gradWidth)
	else
	error('SpatialConvolutionLua:updateGradInput - Incorrect number of gradOutput dimensions')
	end

	-- Check gradOutput
	assert((gradOutput:size(1) == batchSize),
	'SpatialConvolutionLua:updateGradInput - Incorrect gradOutput size for batchSize')
	assert((gradOutput:size(2) == nOutputPlane),
	'SpatialConvolutionLua:updateGradInput - Incorrect gradOutput size for nOutputPlane')
	assert((gradHeight == outputHeight) and (gradWidth == outputWidth),
	'SpatialConvolutionLua:updateGradInput - Incorrect gradOutput size for height/width')

	-- The number of zeroes that gradOutput should be padded with
	local _pH, _pW = (kH - pH - 1), (kW - pW - 1)
	-- The number of zeroes that are inserted between gradOutput pixels
	local _zH, _zW = (dH - 1), (dW - 1)
	-- The number of zeros added to the bottom and right edges of the gradOutput
	local _zB, _zR = (inputHeight + 2pH - kH) % dH, (inputWidth + 2pW - kW) % dW
	-- Strides for gradOutput with padding _pH,_pW and zeroes _zH,_zW
	local _dH, _dW = 1, 1
	-- Adjusted _gradOutput height and width
	local _gradHeight = gradHeight + _zH*(gradHeight-1) + _zB
	local _gradWidth = gradWidth + _zW*(gradWidth-1) + _zR
	-- Resulting height and width of transposed convolution
	local _inputHeight = dH(gradHeight - 1) + _zB + kH - 2pH
	local _inputWidth = dW(gradWidth - 1) + _zR + kW - 2pW

	assert((inputHeight == _inputHeight) and (inputWidth == _inputWidth),
	'SpatialConvolutionLua:updateGradInput'
	..'- failed to calculate transposed convolution parameters!')

	-- Create adapted _gradOutput for transposed convolution
	-- We may need to adjust source gradOutput by inserting zeroes between pixels
	-- and/or adding right and bottom zeroes border for "odd cases"
	local _gradOutput = getAdjustedGradOutput(gradOutput, _zH, _zW, _zB, _zR, _gradHeight, _gradWidth)
	self._gradOutput = _gradOutput

	-- Create reversed (flipped) weight
	local _weight = getReversedWeight(self.weight)

	-- Create resulting gradInput tensor
	local gradInput = torch.Tensor()
	:typeAs(input)
	:resize(batchSize, nInputPlane, inputHeight, inputWidth)

	-- Create a tensor to read 'patches' of _gradOutput image of size (batchSize x nInputPlane x kH x kW)
	local x = torch.Tensor()
	:typeAs(input)
	:resize(batchSize, nInputPlane, kH, kW)

	local xh1, xh2, xw1, xw2
	local oh1, oh2, ow1, ow2
	local ih, iw
	local W

	for o = 1, nOutputPlane do
	ih = 0
	-- Read weight
	W = _weight[o]:view(1, nInputPlane, kH, kW)
	:expand(batchSize, nInputPlane, kH, kW)
	for h = 1 - _pH, _gradHeight + _pH - kH + 1, _dH do
	ih = ih + 1
	iw = 0
	for w = 1 - _pW, _gradWidth + _pW - kW + 1, _dW do
	iw = iw + 1
	-- Fill in input patch x from _gradOutput with respect to padding _pW and _pH
	x:zero()
	xh1, xh2 = h, h + kH - 1
	xw1, xw2 = w, w + kW - 1
	oh1, oh2 = 1, _gradHeight
	ow1, ow2 = 1, _gradWidth
	oh1, oh2 = math.max(xh1, oh1), math.min(xh2, oh2)
	ow1, ow2 = math.max(xw1, ow1), math.min(xw2, ow2)
	if (oh2 >= oh1) and (ow2 >= ow1) then
	xh1, xh2 = (oh1 - xh1 + 1), (kH - xh2 + oh2)
	xw1, xw2 = (ow1 - xw1 + 1), (kW - xw2 + ow2)
	assert(xh2 - xh1 == oh2 - oh1,
	'SpatialConvolutionLua:updateGradInput: Failed to calculate height '
	..xh1..' '..xh2..' '..oh1..' '..oh2)
	assert(xw2 - xw1 == ow2 - ow1,
	'SpatialConvolutionLua:updateGradInput: Failed to calculate width '
	..xw1..' '..xw2..' '..ow1..' '..ow2)
	x[{{}, {}, {xh1, xh2}, {xw1, xw2}}] =
	_gradOutput[{{}, {o,o}, {oh1, oh2}, {ow1, ow2}}]
	:expand(batchSize, nInputPlane, oh2-oh1+1, ow2-ow1+1)
	end
	-- Multiply weight W by x and get resulting pixel for every input plane of each batch sample
	--print('x:\n'..tostring(x))
	--print('W:\n'..tostring(W))
	gradInput[{{}, {}, ih, iw}]:add( torch.cmul(W, x):sum(3):sum(4) )
	end -- for inputWidth
	end -- for inputHeight
	end -- for nOutputPlane

	if f_no_batch then
	gradInput:resize(nInputPlane, inputHeight, inputWidth)
	end

	self.gradInput = gradInput
	return gradInput
	end -- function SpatialConvolutionLua:updateGradInput

	function SpatialConvolutionLua:accGradParameters(input, gradOutput, scale)
	scale = scale or 1

	-- Get initial parameters
	local nInputPlane, nOutputPlane = self.nInputPlane, self.nOutputPlane
	local kW, kH = self.kW, self.kH
	local dW, dH = self.dW, self.dH
	local pW, pH = self.padW, self.padH

	-- Read parameters of input
	local f_no_batch
	local batchSize = 0
	local numChannels = 0
	local inputHeight = 0
	local inputWidth = 0
	if input:dim() == 4 then -- batch
	batchSize = input:size(1)
	numChannels = input:size(2)
	inputHeight = input:size(3)
	inputWidth = input:size(4)
	elseif input:dim() == 3 then -- image
	f_no_batch = true
	batchSize = 1
	numChannels = input:size(1)
	inputHeight = input:size(2)
	inputWidth = input:size(3)
	input = input:view(1, numChannels, inputHeight, inputWidth)
	else
	error('SpatialConvolutionLua:accGradParameters - Incorrect number of input dimensions')
	end

	-- Check input
	assert((nInputPlane == numChannels),
	'SpatialConvolutionLua:accGradParameters - Incorrect input size for nInputPlane')

	-- Calculate output height and width
	local outputHeight = math.floor((inputHeight + 2*pH - kH) / dH) + 1
	local outputWidth = math.floor((inputWidth + 2*pW - kW) / dW) + 1

	-- Read gradOutput parameters
	local gradHeight = 0
	local gradWidth = 0
	if gradOutput:dim() == 4 then -- batch
	gradHeight = gradOutput:size(3)
	gradWidth = gradOutput:size(4)
	elseif gradOutput:dim() == 3 then -- image
	gradHeight = gradOutput:size(2)
	gradWidth = gradOutput:size(3)
	gradOutput = gradOutput:view(1, gradOutput:size(1), gradHeight, gradWidth)
	else
	error('SpatialConvolutionLua:accGradParameters - Incorrect number of gradOutput dimensions')
	end

	-- Check gradOutput
	assert((gradOutput:size(1) == batchSize),
	'SpatialConvolutionLua:accGradParameters - Incorrect gradOutput size for batchSize')
	assert((gradOutput:size(2) == nOutputPlane),
	'SpatialConvolutionLua:accGradParameters - Incorrect gradOutput size for nOutputPlane')
	assert((gradHeight == outputHeight) and (gradWidth == outputWidth),
	'SpatialConvolutionLua:accGradParameters - Incorrect gradOutput size for height/width')

	-- The number of zeroes that are inserted between gradOutput pixels
	local _zH, _zW = (dH - 1), (dW - 1)
	-- The number of zeros added to the bottom and right edges of the gradOutput
	local _zB, _zR = (inputHeight + 2pH - kH) % dH, (inputWidth + 2pW - kW) % dW
	-- Strides for gradOutput with padding _pH,_pW and zeroes _zH,_zW
	local _dH, _dW = 1, 1
	-- Adjusted _gradOutput height and width
	local _gradHeight = gradHeight + _zH*(gradHeight-1) + _zB
	local _gradWidth = gradWidth + _zW*(gradWidth-1) + _zR
	local _kH, _kW = _gradHeight, _gradWidth

	-- Reuse or create adapted _gradOutput for transposed convolution
	-- We may need to adjust source gradOutput by inserting zeroes between pixels
	-- and/or adding right and bottom zeroes border for "odd cases"
	local _gradOutput = self._gradOutput
	if (not _gradOutput) or (_gradOutput:size(1) ~= batchSize)
	or (_gradOutput:size(2) ~= nOutputPlane)
	or (_gradOutput:size(3) ~= _gradHeight) or (_gradOutput:size(4) ~= _gradWidth) then
	print('SpatialConvolutionLua:_gradOutput mismatch!')
	_gradOutput = getAdjustedGradOutput(gradOutput, _zH, _zW, _zB, _zR, _gradHeight, _gradWidth)
	self._gradOutput = _gradOutput
	end

	-- Get output tensor gradWeight
	local gradWeight = self.gradWeight

	-- Create a tensor to read 'patches' of input image of size (batchSize x nInputPlane x _kH x _kW)
	local x = torch.Tensor()
	:typeAs(input)
	:resize(batchSize, nInputPlane, _kH, _kW)

	local xh1, xh2, xw1, xw2
	local ih1, ih2, iw1, iw2
	local wh, ww
	local G

	for o = 1, nOutputPlane do
	wh = 0
	for h = 1 - pH, inputHeight + pH - _kH + 1, _dH do
	wh = wh + 1
	ww = 0
	for w = 1 - pW, inputWidth + pW - _kW + 1, _dW do
	ww = ww + 1
	-- Fill in patch x from input with respect to padding pW and pH
	x:zero()
	xh1, xh2 = h, h + _kH - 1
	xw1, xw2 = w, w + _kW - 1
	ih1, ih2 = 1, inputHeight
	iw1, iw2 = 1, inputWidth
	ih1, ih2 = math.max(xh1, ih1), math.min(xh2, ih2)
	iw1, iw2 = math.max(xw1, iw1), math.min(xw2, iw2)
	if (ih2 >= ih1) and (iw2 >= iw1) then
	xh1, xh2 = (ih1 - xh1 + 1), (_kH - xh2 + ih2)
	xw1, xw2 = (iw1 - xw1 + 1), (_kW - xw2 + iw2)
	assert(xh2 - xh1 == ih2 - ih1,
	'SpatialConvolutionLua:accGradParameters: Failed to calculate height '
	..xh1..' '..xh2..' '..ih1..' '..ih2)
	assert(xw2 - xw1 == iw2 - iw1,
	'SpatialConvolutionLua:accGradParameters: Failed to calculate width '
	..xw1..' '..xw2..' '..iw1..' '..iw2)
	x[{{}, {}, {xh1, xh2}, {xw1, xw2}}] = input[{{}, {}, {ih1, ih2}, {iw1, iw2}}]
	end
	-- Multiply gradOutput G by x and get resulting weight vector:
	-- gradWeight[o][_all_input_planes_][wh][ww]
	G = _gradOutput[{{}, {o,o}, {}, {}}]:expand(batchSize, nInputPlane, _kH, _kW)
	gradWeight[{o, {}, wh, ww}]:add( scale, torch.cmul(G, x):sum(3):sum(4):sum(1) )
	end -- for inputWidth
	end -- for inputHeight
	end -- for nOutputPlane

	--return gradWeight
	end -- SpatialConvolutionLua:accGradParameters

	function SpatialConvolutionLua:type(type,tensorCache)
	self.finput = self.finput and torch.Tensor()
	self.fgradInput = self.fgradInput and torch.Tensor()
	return parent.type(self,type,tensorCache)
	end

	function SpatialConvolutionLua:__tostring__()
	local s = string.format('%s(%d -> %d, %dx%d', torch.type(self),
	self.nInputPlane, self.nOutputPlane, self.kW, self.kH)
	if self.dW ~= 1 or self.dH ~= 1 or self.padW ~= 0 or self.padH ~= 0 then
	s = s .. string.format(', %d,%d', self.dW, self.dH)
	end
	if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then
	s = s .. ', ' .. self.padW .. ',' .. self.padH
	end
	if self.bias then
	return s .. ')'
	else
	return s .. ') without bias'
	end
	end

	function SpatialConvolutionLua:clearState()
	nn.utils.clear(self, '_gradOutput')
	return parent.clearState(self)
	end