add StrongSORT Tacker

This commit is contained in:
Pongsatorn Kanjanasantisak 2025-08-10 01:23:09 +07:00
parent ffc2e99678
commit b7d8b3266f
93 changed files with 20230 additions and 6 deletions

View file

@ -0,0 +1,122 @@
from __future__ import absolute_import
import torch
from .pcb import *
from .mlfn import *
from .hacnn import *
from .osnet import *
from .senet import *
from .mudeep import *
from .nasnet import *
from .resnet import *
from .densenet import *
from .xception import *
from .osnet_ain import *
from .resnetmid import *
from .shufflenet import *
from .squeezenet import *
from .inceptionv4 import *
from .mobilenetv2 import *
from .resnet_ibn_a import *
from .resnet_ibn_b import *
from .shufflenetv2 import *
from .inceptionresnetv2 import *
__model_factory = {
# image classification models
'resnet18': resnet18,
'resnet34': resnet34,
'resnet50': resnet50,
'resnet101': resnet101,
'resnet152': resnet152,
'resnext50_32x4d': resnext50_32x4d,
'resnext101_32x8d': resnext101_32x8d,
'resnet50_fc512': resnet50_fc512,
'se_resnet50': se_resnet50,
'se_resnet50_fc512': se_resnet50_fc512,
'se_resnet101': se_resnet101,
'se_resnext50_32x4d': se_resnext50_32x4d,
'se_resnext101_32x4d': se_resnext101_32x4d,
'densenet121': densenet121,
'densenet169': densenet169,
'densenet201': densenet201,
'densenet161': densenet161,
'densenet121_fc512': densenet121_fc512,
'inceptionresnetv2': inceptionresnetv2,
'inceptionv4': inceptionv4,
'xception': xception,
'resnet50_ibn_a': resnet50_ibn_a,
'resnet50_ibn_b': resnet50_ibn_b,
# lightweight models
'nasnsetmobile': nasnetamobile,
'mobilenetv2_x1_0': mobilenetv2_x1_0,
'mobilenetv2_x1_4': mobilenetv2_x1_4,
'shufflenet': shufflenet,
'squeezenet1_0': squeezenet1_0,
'squeezenet1_0_fc512': squeezenet1_0_fc512,
'squeezenet1_1': squeezenet1_1,
'shufflenet_v2_x0_5': shufflenet_v2_x0_5,
'shufflenet_v2_x1_0': shufflenet_v2_x1_0,
'shufflenet_v2_x1_5': shufflenet_v2_x1_5,
'shufflenet_v2_x2_0': shufflenet_v2_x2_0,
# reid-specific models
'mudeep': MuDeep,
'resnet50mid': resnet50mid,
'hacnn': HACNN,
'pcb_p6': pcb_p6,
'pcb_p4': pcb_p4,
'mlfn': mlfn,
'osnet_x1_0': osnet_x1_0,
'osnet_x0_75': osnet_x0_75,
'osnet_x0_5': osnet_x0_5,
'osnet_x0_25': osnet_x0_25,
'osnet_ibn_x1_0': osnet_ibn_x1_0,
'osnet_ain_x1_0': osnet_ain_x1_0,
'osnet_ain_x0_75': osnet_ain_x0_75,
'osnet_ain_x0_5': osnet_ain_x0_5,
'osnet_ain_x0_25': osnet_ain_x0_25
}
def show_avai_models():
"""Displays available models.
Examples::
>>> from torchreid import models
>>> models.show_avai_models()
"""
print(list(__model_factory.keys()))
def build_model(
name, num_classes, loss='softmax', pretrained=True, use_gpu=True
):
"""A function wrapper for building a model.
Args:
name (str): model name.
num_classes (int): number of training identities.
loss (str, optional): loss function to optimize the model. Currently
supports "softmax" and "triplet". Default is "softmax".
pretrained (bool, optional): whether to load ImageNet-pretrained weights.
Default is True.
use_gpu (bool, optional): whether to use gpu. Default is True.
Returns:
nn.Module
Examples::
>>> from torchreid import models
>>> model = models.build_model('resnet50', 751, loss='softmax')
"""
avai_models = list(__model_factory.keys())
if name not in avai_models:
raise KeyError(
'Unknown model: {}. Must be one of {}'.format(name, avai_models)
)
return __model_factory[name](
num_classes=num_classes,
loss=loss,
pretrained=pretrained,
use_gpu=use_gpu
)

View file

@ -0,0 +1,380 @@
"""
Code source: https://github.com/pytorch/vision
"""
from __future__ import division, absolute_import
import re
from collections import OrderedDict
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils import model_zoo
__all__ = [
'densenet121', 'densenet169', 'densenet201', 'densenet161',
'densenet121_fc512'
]
model_urls = {
'densenet121':
'https://download.pytorch.org/models/densenet121-a639ec97.pth',
'densenet169':
'https://download.pytorch.org/models/densenet169-b2777c0a.pth',
'densenet201':
'https://download.pytorch.org/models/densenet201-c1103571.pth',
'densenet161':
'https://download.pytorch.org/models/densenet161-8d451a50.pth',
}
class _DenseLayer(nn.Sequential):
def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
super(_DenseLayer, self).__init__()
self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
self.add_module('relu1', nn.ReLU(inplace=True)),
self.add_module(
'conv1',
nn.Conv2d(
num_input_features,
bn_size * growth_rate,
kernel_size=1,
stride=1,
bias=False
)
),
self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
self.add_module('relu2', nn.ReLU(inplace=True)),
self.add_module(
'conv2',
nn.Conv2d(
bn_size * growth_rate,
growth_rate,
kernel_size=3,
stride=1,
padding=1,
bias=False
)
),
self.drop_rate = drop_rate
def forward(self, x):
new_features = super(_DenseLayer, self).forward(x)
if self.drop_rate > 0:
new_features = F.dropout(
new_features, p=self.drop_rate, training=self.training
)
return torch.cat([x, new_features], 1)
class _DenseBlock(nn.Sequential):
def __init__(
self, num_layers, num_input_features, bn_size, growth_rate, drop_rate
):
super(_DenseBlock, self).__init__()
for i in range(num_layers):
layer = _DenseLayer(
num_input_features + i*growth_rate, growth_rate, bn_size,
drop_rate
)
self.add_module('denselayer%d' % (i+1), layer)
class _Transition(nn.Sequential):
def __init__(self, num_input_features, num_output_features):
super(_Transition, self).__init__()
self.add_module('norm', nn.BatchNorm2d(num_input_features))
self.add_module('relu', nn.ReLU(inplace=True))
self.add_module(
'conv',
nn.Conv2d(
num_input_features,
num_output_features,
kernel_size=1,
stride=1,
bias=False
)
)
self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))
class DenseNet(nn.Module):
"""Densely connected network.
Reference:
Huang et al. Densely Connected Convolutional Networks. CVPR 2017.
Public keys:
- ``densenet121``: DenseNet121.
- ``densenet169``: DenseNet169.
- ``densenet201``: DenseNet201.
- ``densenet161``: DenseNet161.
- ``densenet121_fc512``: DenseNet121 + FC.
"""
def __init__(
self,
num_classes,
loss,
growth_rate=32,
block_config=(6, 12, 24, 16),
num_init_features=64,
bn_size=4,
drop_rate=0,
fc_dims=None,
dropout_p=None,
**kwargs
):
super(DenseNet, self).__init__()
self.loss = loss
# First convolution
self.features = nn.Sequential(
OrderedDict(
[
(
'conv0',
nn.Conv2d(
3,
num_init_features,
kernel_size=7,
stride=2,
padding=3,
bias=False
)
),
('norm0', nn.BatchNorm2d(num_init_features)),
('relu0', nn.ReLU(inplace=True)),
(
'pool0',
nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
),
]
)
)
# Each denseblock
num_features = num_init_features
for i, num_layers in enumerate(block_config):
block = _DenseBlock(
num_layers=num_layers,
num_input_features=num_features,
bn_size=bn_size,
growth_rate=growth_rate,
drop_rate=drop_rate
)
self.features.add_module('denseblock%d' % (i+1), block)
num_features = num_features + num_layers*growth_rate
if i != len(block_config) - 1:
trans = _Transition(
num_input_features=num_features,
num_output_features=num_features // 2
)
self.features.add_module('transition%d' % (i+1), trans)
num_features = num_features // 2
# Final batch norm
self.features.add_module('norm5', nn.BatchNorm2d(num_features))
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.feature_dim = num_features
self.fc = self._construct_fc_layer(fc_dims, num_features, dropout_p)
# Linear layer
self.classifier = nn.Linear(self.feature_dim, num_classes)
self._init_params()
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
"""Constructs fully connected layer.
Args:
fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
input_dim (int): input dimension
dropout_p (float): dropout probability, if None, dropout is unused
"""
if fc_dims is None:
self.feature_dim = input_dim
return None
assert isinstance(
fc_dims, (list, tuple)
), 'fc_dims must be either list or tuple, but got {}'.format(
type(fc_dims)
)
layers = []
for dim in fc_dims:
layers.append(nn.Linear(input_dim, dim))
layers.append(nn.BatchNorm1d(dim))
layers.append(nn.ReLU(inplace=True))
if dropout_p is not None:
layers.append(nn.Dropout(p=dropout_p))
input_dim = dim
self.feature_dim = fc_dims[-1]
return nn.Sequential(*layers)
def _init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu'
)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
f = self.features(x)
f = F.relu(f, inplace=True)
v = self.global_avgpool(f)
v = v.view(v.size(0), -1)
if self.fc is not None:
v = self.fc(v)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError('Unsupported loss: {}'.format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
pretrain_dict = model_zoo.load_url(model_url)
# '.'s are no longer allowed in module names, but pervious _DenseLayer
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
# They are also in the checkpoints in model_urls. This pattern is used
# to find such keys.
pattern = re.compile(
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$'
)
for key in list(pretrain_dict.keys()):
res = pattern.match(key)
if res:
new_key = res.group(1) + res.group(2)
pretrain_dict[new_key] = pretrain_dict[key]
del pretrain_dict[key]
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
"""
Dense network configurations:
--
densenet121: num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16)
densenet169: num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32)
densenet201: num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32)
densenet161: num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24)
"""
def densenet121(num_classes, loss='softmax', pretrained=True, **kwargs):
model = DenseNet(
num_classes=num_classes,
loss=loss,
num_init_features=64,
growth_rate=32,
block_config=(6, 12, 24, 16),
fc_dims=None,
dropout_p=None,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['densenet121'])
return model
def densenet169(num_classes, loss='softmax', pretrained=True, **kwargs):
model = DenseNet(
num_classes=num_classes,
loss=loss,
num_init_features=64,
growth_rate=32,
block_config=(6, 12, 32, 32),
fc_dims=None,
dropout_p=None,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['densenet169'])
return model
def densenet201(num_classes, loss='softmax', pretrained=True, **kwargs):
model = DenseNet(
num_classes=num_classes,
loss=loss,
num_init_features=64,
growth_rate=32,
block_config=(6, 12, 48, 32),
fc_dims=None,
dropout_p=None,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['densenet201'])
return model
def densenet161(num_classes, loss='softmax', pretrained=True, **kwargs):
model = DenseNet(
num_classes=num_classes,
loss=loss,
num_init_features=96,
growth_rate=48,
block_config=(6, 12, 36, 24),
fc_dims=None,
dropout_p=None,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['densenet161'])
return model
def densenet121_fc512(num_classes, loss='softmax', pretrained=True, **kwargs):
model = DenseNet(
num_classes=num_classes,
loss=loss,
num_init_features=64,
growth_rate=32,
block_config=(6, 12, 24, 16),
fc_dims=[512],
dropout_p=None,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['densenet121'])
return model

View file

@ -0,0 +1,414 @@
from __future__ import division, absolute_import
import torch
from torch import nn
from torch.nn import functional as F
__all__ = ['HACNN']
class ConvBlock(nn.Module):
"""Basic convolutional block.
convolution + batch normalization + relu.
Args:
in_c (int): number of input channels.
out_c (int): number of output channels.
k (int or tuple): kernel size.
s (int or tuple): stride.
p (int or tuple): padding.
"""
def __init__(self, in_c, out_c, k, s=1, p=0):
super(ConvBlock, self).__init__()
self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p)
self.bn = nn.BatchNorm2d(out_c)
def forward(self, x):
return F.relu(self.bn(self.conv(x)))
class InceptionA(nn.Module):
def __init__(self, in_channels, out_channels):
super(InceptionA, self).__init__()
mid_channels = out_channels // 4
self.stream1 = nn.Sequential(
ConvBlock(in_channels, mid_channels, 1),
ConvBlock(mid_channels, mid_channels, 3, p=1),
)
self.stream2 = nn.Sequential(
ConvBlock(in_channels, mid_channels, 1),
ConvBlock(mid_channels, mid_channels, 3, p=1),
)
self.stream3 = nn.Sequential(
ConvBlock(in_channels, mid_channels, 1),
ConvBlock(mid_channels, mid_channels, 3, p=1),
)
self.stream4 = nn.Sequential(
nn.AvgPool2d(3, stride=1, padding=1),
ConvBlock(in_channels, mid_channels, 1),
)
def forward(self, x):
s1 = self.stream1(x)
s2 = self.stream2(x)
s3 = self.stream3(x)
s4 = self.stream4(x)
y = torch.cat([s1, s2, s3, s4], dim=1)
return y
class InceptionB(nn.Module):
def __init__(self, in_channels, out_channels):
super(InceptionB, self).__init__()
mid_channels = out_channels // 4
self.stream1 = nn.Sequential(
ConvBlock(in_channels, mid_channels, 1),
ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
)
self.stream2 = nn.Sequential(
ConvBlock(in_channels, mid_channels, 1),
ConvBlock(mid_channels, mid_channels, 3, p=1),
ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
)
self.stream3 = nn.Sequential(
nn.MaxPool2d(3, stride=2, padding=1),
ConvBlock(in_channels, mid_channels * 2, 1),
)
def forward(self, x):
s1 = self.stream1(x)
s2 = self.stream2(x)
s3 = self.stream3(x)
y = torch.cat([s1, s2, s3], dim=1)
return y
class SpatialAttn(nn.Module):
"""Spatial Attention (Sec. 3.1.I.1)"""
def __init__(self):
super(SpatialAttn, self).__init__()
self.conv1 = ConvBlock(1, 1, 3, s=2, p=1)
self.conv2 = ConvBlock(1, 1, 1)
def forward(self, x):
# global cross-channel averaging
x = x.mean(1, keepdim=True)
# 3-by-3 conv
x = self.conv1(x)
# bilinear resizing
x = F.upsample(
x, (x.size(2) * 2, x.size(3) * 2),
mode='bilinear',
align_corners=True
)
# scaling conv
x = self.conv2(x)
return x
class ChannelAttn(nn.Module):
"""Channel Attention (Sec. 3.1.I.2)"""
def __init__(self, in_channels, reduction_rate=16):
super(ChannelAttn, self).__init__()
assert in_channels % reduction_rate == 0
self.conv1 = ConvBlock(in_channels, in_channels // reduction_rate, 1)
self.conv2 = ConvBlock(in_channels // reduction_rate, in_channels, 1)
def forward(self, x):
# squeeze operation (global average pooling)
x = F.avg_pool2d(x, x.size()[2:])
# excitation operation (2 conv layers)
x = self.conv1(x)
x = self.conv2(x)
return x
class SoftAttn(nn.Module):
"""Soft Attention (Sec. 3.1.I)
Aim: Spatial Attention + Channel Attention
Output: attention maps with shape identical to input.
"""
def __init__(self, in_channels):
super(SoftAttn, self).__init__()
self.spatial_attn = SpatialAttn()
self.channel_attn = ChannelAttn(in_channels)
self.conv = ConvBlock(in_channels, in_channels, 1)
def forward(self, x):
y_spatial = self.spatial_attn(x)
y_channel = self.channel_attn(x)
y = y_spatial * y_channel
y = torch.sigmoid(self.conv(y))
return y
class HardAttn(nn.Module):
"""Hard Attention (Sec. 3.1.II)"""
def __init__(self, in_channels):
super(HardAttn, self).__init__()
self.fc = nn.Linear(in_channels, 4 * 2)
self.init_params()
def init_params(self):
self.fc.weight.data.zero_()
self.fc.bias.data.copy_(
torch.tensor(
[0, -0.75, 0, -0.25, 0, 0.25, 0, 0.75], dtype=torch.float
)
)
def forward(self, x):
# squeeze operation (global average pooling)
x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), x.size(1))
# predict transformation parameters
theta = torch.tanh(self.fc(x))
theta = theta.view(-1, 4, 2)
return theta
class HarmAttn(nn.Module):
"""Harmonious Attention (Sec. 3.1)"""
def __init__(self, in_channels):
super(HarmAttn, self).__init__()
self.soft_attn = SoftAttn(in_channels)
self.hard_attn = HardAttn(in_channels)
def forward(self, x):
y_soft_attn = self.soft_attn(x)
theta = self.hard_attn(x)
return y_soft_attn, theta
class HACNN(nn.Module):
"""Harmonious Attention Convolutional Neural Network.
Reference:
Li et al. Harmonious Attention Network for Person Re-identification. CVPR 2018.
Public keys:
- ``hacnn``: HACNN.
"""
# Args:
# num_classes (int): number of classes to predict
# nchannels (list): number of channels AFTER concatenation
# feat_dim (int): feature dimension for a single stream
# learn_region (bool): whether to learn region features (i.e. local branch)
def __init__(
self,
num_classes,
loss='softmax',
nchannels=[128, 256, 384],
feat_dim=512,
learn_region=True,
use_gpu=True,
**kwargs
):
super(HACNN, self).__init__()
self.loss = loss
self.learn_region = learn_region
self.use_gpu = use_gpu
self.conv = ConvBlock(3, 32, 3, s=2, p=1)
# Construct Inception + HarmAttn blocks
# ============== Block 1 ==============
self.inception1 = nn.Sequential(
InceptionA(32, nchannels[0]),
InceptionB(nchannels[0], nchannels[0]),
)
self.ha1 = HarmAttn(nchannels[0])
# ============== Block 2 ==============
self.inception2 = nn.Sequential(
InceptionA(nchannels[0], nchannels[1]),
InceptionB(nchannels[1], nchannels[1]),
)
self.ha2 = HarmAttn(nchannels[1])
# ============== Block 3 ==============
self.inception3 = nn.Sequential(
InceptionA(nchannels[1], nchannels[2]),
InceptionB(nchannels[2], nchannels[2]),
)
self.ha3 = HarmAttn(nchannels[2])
self.fc_global = nn.Sequential(
nn.Linear(nchannels[2], feat_dim),
nn.BatchNorm1d(feat_dim),
nn.ReLU(),
)
self.classifier_global = nn.Linear(feat_dim, num_classes)
if self.learn_region:
self.init_scale_factors()
self.local_conv1 = InceptionB(32, nchannels[0])
self.local_conv2 = InceptionB(nchannels[0], nchannels[1])
self.local_conv3 = InceptionB(nchannels[1], nchannels[2])
self.fc_local = nn.Sequential(
nn.Linear(nchannels[2] * 4, feat_dim),
nn.BatchNorm1d(feat_dim),
nn.ReLU(),
)
self.classifier_local = nn.Linear(feat_dim, num_classes)
self.feat_dim = feat_dim * 2
else:
self.feat_dim = feat_dim
def init_scale_factors(self):
# initialize scale factors (s_w, s_h) for four regions
self.scale_factors = []
self.scale_factors.append(
torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
)
self.scale_factors.append(
torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
)
self.scale_factors.append(
torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
)
self.scale_factors.append(
torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
)
def stn(self, x, theta):
"""Performs spatial transform
x: (batch, channel, height, width)
theta: (batch, 2, 3)
"""
grid = F.affine_grid(theta, x.size())
x = F.grid_sample(x, grid)
return x
def transform_theta(self, theta_i, region_idx):
"""Transforms theta to include (s_w, s_h), resulting in (batch, 2, 3)"""
scale_factors = self.scale_factors[region_idx]
theta = torch.zeros(theta_i.size(0), 2, 3)
theta[:, :, :2] = scale_factors
theta[:, :, -1] = theta_i
if self.use_gpu:
theta = theta.cuda()
return theta
def forward(self, x):
assert x.size(2) == 160 and x.size(3) == 64, \
'Input size does not match, expected (160, 64) but got ({}, {})'.format(x.size(2), x.size(3))
x = self.conv(x)
# ============== Block 1 ==============
# global branch
x1 = self.inception1(x)
x1_attn, x1_theta = self.ha1(x1)
x1_out = x1 * x1_attn
# local branch
if self.learn_region:
x1_local_list = []
for region_idx in range(4):
x1_theta_i = x1_theta[:, region_idx, :]
x1_theta_i = self.transform_theta(x1_theta_i, region_idx)
x1_trans_i = self.stn(x, x1_theta_i)
x1_trans_i = F.upsample(
x1_trans_i, (24, 28), mode='bilinear', align_corners=True
)
x1_local_i = self.local_conv1(x1_trans_i)
x1_local_list.append(x1_local_i)
# ============== Block 2 ==============
# Block 2
# global branch
x2 = self.inception2(x1_out)
x2_attn, x2_theta = self.ha2(x2)
x2_out = x2 * x2_attn
# local branch
if self.learn_region:
x2_local_list = []
for region_idx in range(4):
x2_theta_i = x2_theta[:, region_idx, :]
x2_theta_i = self.transform_theta(x2_theta_i, region_idx)
x2_trans_i = self.stn(x1_out, x2_theta_i)
x2_trans_i = F.upsample(
x2_trans_i, (12, 14), mode='bilinear', align_corners=True
)
x2_local_i = x2_trans_i + x1_local_list[region_idx]
x2_local_i = self.local_conv2(x2_local_i)
x2_local_list.append(x2_local_i)
# ============== Block 3 ==============
# Block 3
# global branch
x3 = self.inception3(x2_out)
x3_attn, x3_theta = self.ha3(x3)
x3_out = x3 * x3_attn
# local branch
if self.learn_region:
x3_local_list = []
for region_idx in range(4):
x3_theta_i = x3_theta[:, region_idx, :]
x3_theta_i = self.transform_theta(x3_theta_i, region_idx)
x3_trans_i = self.stn(x2_out, x3_theta_i)
x3_trans_i = F.upsample(
x3_trans_i, (6, 7), mode='bilinear', align_corners=True
)
x3_local_i = x3_trans_i + x2_local_list[region_idx]
x3_local_i = self.local_conv3(x3_local_i)
x3_local_list.append(x3_local_i)
# ============== Feature generation ==============
# global branch
x_global = F.avg_pool2d(x3_out,
x3_out.size()[2:]
).view(x3_out.size(0), x3_out.size(1))
x_global = self.fc_global(x_global)
# local branch
if self.learn_region:
x_local_list = []
for region_idx in range(4):
x_local_i = x3_local_list[region_idx]
x_local_i = F.avg_pool2d(x_local_i,
x_local_i.size()[2:]
).view(x_local_i.size(0), -1)
x_local_list.append(x_local_i)
x_local = torch.cat(x_local_list, 1)
x_local = self.fc_local(x_local)
if not self.training:
# l2 normalization before concatenation
if self.learn_region:
x_global = x_global / x_global.norm(p=2, dim=1, keepdim=True)
x_local = x_local / x_local.norm(p=2, dim=1, keepdim=True)
return torch.cat([x_global, x_local], 1)
else:
return x_global
prelogits_global = self.classifier_global(x_global)
if self.learn_region:
prelogits_local = self.classifier_local(x_local)
if self.loss == 'softmax':
if self.learn_region:
return (prelogits_global, prelogits_local)
else:
return prelogits_global
elif self.loss == 'triplet':
if self.learn_region:
return (prelogits_global, prelogits_local), (x_global, x_local)
else:
return prelogits_global, x_global
else:
raise KeyError("Unsupported loss: {}".format(self.loss))

View file

@ -0,0 +1,361 @@
"""
Code imported from https://github.com/Cadene/pretrained-models.pytorch
"""
from __future__ import division, absolute_import
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
__all__ = ['inceptionresnetv2']
pretrained_settings = {
'inceptionresnetv2': {
'imagenet': {
'url':
'http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth',
'input_space': 'RGB',
'input_size': [3, 299, 299],
'input_range': [0, 1],
'mean': [0.5, 0.5, 0.5],
'std': [0.5, 0.5, 0.5],
'num_classes': 1000
},
'imagenet+background': {
'url':
'http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth',
'input_space': 'RGB',
'input_size': [3, 299, 299],
'input_range': [0, 1],
'mean': [0.5, 0.5, 0.5],
'std': [0.5, 0.5, 0.5],
'num_classes': 1001
}
}
}
class BasicConv2d(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(
in_planes,
out_planes,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias=False
) # verify bias false
self.bn = nn.BatchNorm2d(
out_planes,
eps=0.001, # value found in tensorflow
momentum=0.1, # default pytorch value
affine=True
)
self.relu = nn.ReLU(inplace=False)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Mixed_5b(nn.Module):
def __init__(self):
super(Mixed_5b, self).__init__()
self.branch0 = BasicConv2d(192, 96, kernel_size=1, stride=1)
self.branch1 = nn.Sequential(
BasicConv2d(192, 48, kernel_size=1, stride=1),
BasicConv2d(48, 64, kernel_size=5, stride=1, padding=2)
)
self.branch2 = nn.Sequential(
BasicConv2d(192, 64, kernel_size=1, stride=1),
BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),
BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1)
)
self.branch3 = nn.Sequential(
nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
BasicConv2d(192, 64, kernel_size=1, stride=1)
)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
x3 = self.branch3(x)
out = torch.cat((x0, x1, x2, x3), 1)
return out
class Block35(nn.Module):
def __init__(self, scale=1.0):
super(Block35, self).__init__()
self.scale = scale
self.branch0 = BasicConv2d(320, 32, kernel_size=1, stride=1)
self.branch1 = nn.Sequential(
BasicConv2d(320, 32, kernel_size=1, stride=1),
BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1)
)
self.branch2 = nn.Sequential(
BasicConv2d(320, 32, kernel_size=1, stride=1),
BasicConv2d(32, 48, kernel_size=3, stride=1, padding=1),
BasicConv2d(48, 64, kernel_size=3, stride=1, padding=1)
)
self.conv2d = nn.Conv2d(128, 320, kernel_size=1, stride=1)
self.relu = nn.ReLU(inplace=False)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
out = torch.cat((x0, x1, x2), 1)
out = self.conv2d(out)
out = out * self.scale + x
out = self.relu(out)
return out
class Mixed_6a(nn.Module):
def __init__(self):
super(Mixed_6a, self).__init__()
self.branch0 = BasicConv2d(320, 384, kernel_size=3, stride=2)
self.branch1 = nn.Sequential(
BasicConv2d(320, 256, kernel_size=1, stride=1),
BasicConv2d(256, 256, kernel_size=3, stride=1, padding=1),
BasicConv2d(256, 384, kernel_size=3, stride=2)
)
self.branch2 = nn.MaxPool2d(3, stride=2)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
out = torch.cat((x0, x1, x2), 1)
return out
class Block17(nn.Module):
def __init__(self, scale=1.0):
super(Block17, self).__init__()
self.scale = scale
self.branch0 = BasicConv2d(1088, 192, kernel_size=1, stride=1)
self.branch1 = nn.Sequential(
BasicConv2d(1088, 128, kernel_size=1, stride=1),
BasicConv2d(
128, 160, kernel_size=(1, 7), stride=1, padding=(0, 3)
),
BasicConv2d(
160, 192, kernel_size=(7, 1), stride=1, padding=(3, 0)
)
)
self.conv2d = nn.Conv2d(384, 1088, kernel_size=1, stride=1)
self.relu = nn.ReLU(inplace=False)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
out = torch.cat((x0, x1), 1)
out = self.conv2d(out)
out = out * self.scale + x
out = self.relu(out)
return out
class Mixed_7a(nn.Module):
def __init__(self):
super(Mixed_7a, self).__init__()
self.branch0 = nn.Sequential(
BasicConv2d(1088, 256, kernel_size=1, stride=1),
BasicConv2d(256, 384, kernel_size=3, stride=2)
)
self.branch1 = nn.Sequential(
BasicConv2d(1088, 256, kernel_size=1, stride=1),
BasicConv2d(256, 288, kernel_size=3, stride=2)
)
self.branch2 = nn.Sequential(
BasicConv2d(1088, 256, kernel_size=1, stride=1),
BasicConv2d(256, 288, kernel_size=3, stride=1, padding=1),
BasicConv2d(288, 320, kernel_size=3, stride=2)
)
self.branch3 = nn.MaxPool2d(3, stride=2)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
x3 = self.branch3(x)
out = torch.cat((x0, x1, x2, x3), 1)
return out
class Block8(nn.Module):
def __init__(self, scale=1.0, noReLU=False):
super(Block8, self).__init__()
self.scale = scale
self.noReLU = noReLU
self.branch0 = BasicConv2d(2080, 192, kernel_size=1, stride=1)
self.branch1 = nn.Sequential(
BasicConv2d(2080, 192, kernel_size=1, stride=1),
BasicConv2d(
192, 224, kernel_size=(1, 3), stride=1, padding=(0, 1)
),
BasicConv2d(
224, 256, kernel_size=(3, 1), stride=1, padding=(1, 0)
)
)
self.conv2d = nn.Conv2d(448, 2080, kernel_size=1, stride=1)
if not self.noReLU:
self.relu = nn.ReLU(inplace=False)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
out = torch.cat((x0, x1), 1)
out = self.conv2d(out)
out = out * self.scale + x
if not self.noReLU:
out = self.relu(out)
return out
# ----------------
# Model Definition
# ----------------
class InceptionResNetV2(nn.Module):
"""Inception-ResNet-V2.
Reference:
Szegedy et al. Inception-v4, Inception-ResNet and the Impact of Residual
Connections on Learning. AAAI 2017.
Public keys:
- ``inceptionresnetv2``: Inception-ResNet-V2.
"""
def __init__(self, num_classes, loss='softmax', **kwargs):
super(InceptionResNetV2, self).__init__()
self.loss = loss
# Modules
self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2)
self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1)
self.conv2d_2b = BasicConv2d(
32, 64, kernel_size=3, stride=1, padding=1
)
self.maxpool_3a = nn.MaxPool2d(3, stride=2)
self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1)
self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1)
self.maxpool_5a = nn.MaxPool2d(3, stride=2)
self.mixed_5b = Mixed_5b()
self.repeat = nn.Sequential(
Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17),
Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17),
Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17),
Block35(scale=0.17)
)
self.mixed_6a = Mixed_6a()
self.repeat_1 = nn.Sequential(
Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10),
Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10),
Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10),
Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10),
Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10),
Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10),
Block17(scale=0.10), Block17(scale=0.10)
)
self.mixed_7a = Mixed_7a()
self.repeat_2 = nn.Sequential(
Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20),
Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20),
Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20)
)
self.block8 = Block8(noReLU=True)
self.conv2d_7b = BasicConv2d(2080, 1536, kernel_size=1, stride=1)
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(1536, num_classes)
def load_imagenet_weights(self):
settings = pretrained_settings['inceptionresnetv2']['imagenet']
pretrain_dict = model_zoo.load_url(settings['url'])
model_dict = self.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
self.load_state_dict(model_dict)
def featuremaps(self, x):
x = self.conv2d_1a(x)
x = self.conv2d_2a(x)
x = self.conv2d_2b(x)
x = self.maxpool_3a(x)
x = self.conv2d_3b(x)
x = self.conv2d_4a(x)
x = self.maxpool_5a(x)
x = self.mixed_5b(x)
x = self.repeat(x)
x = self.mixed_6a(x)
x = self.repeat_1(x)
x = self.mixed_7a(x)
x = self.repeat_2(x)
x = self.block8(x)
x = self.conv2d_7b(x)
return x
def forward(self, x):
f = self.featuremaps(x)
v = self.global_avgpool(f)
v = v.view(v.size(0), -1)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError('Unsupported loss: {}'.format(self.loss))
def inceptionresnetv2(num_classes, loss='softmax', pretrained=True, **kwargs):
model = InceptionResNetV2(num_classes=num_classes, loss=loss, **kwargs)
if pretrained:
model.load_imagenet_weights()
return model

View file

@ -0,0 +1,381 @@
from __future__ import division, absolute_import
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
__all__ = ['inceptionv4']
"""
Code imported from https://github.com/Cadene/pretrained-models.pytorch
"""
pretrained_settings = {
'inceptionv4': {
'imagenet': {
'url':
'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',
'input_space': 'RGB',
'input_size': [3, 299, 299],
'input_range': [0, 1],
'mean': [0.5, 0.5, 0.5],
'std': [0.5, 0.5, 0.5],
'num_classes': 1000
},
'imagenet+background': {
'url':
'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',
'input_space': 'RGB',
'input_size': [3, 299, 299],
'input_range': [0, 1],
'mean': [0.5, 0.5, 0.5],
'std': [0.5, 0.5, 0.5],
'num_classes': 1001
}
}
}
class BasicConv2d(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(
in_planes,
out_planes,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias=False
) # verify bias false
self.bn = nn.BatchNorm2d(
out_planes,
eps=0.001, # value found in tensorflow
momentum=0.1, # default pytorch value
affine=True
)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Mixed_3a(nn.Module):
def __init__(self):
super(Mixed_3a, self).__init__()
self.maxpool = nn.MaxPool2d(3, stride=2)
self.conv = BasicConv2d(64, 96, kernel_size=3, stride=2)
def forward(self, x):
x0 = self.maxpool(x)
x1 = self.conv(x)
out = torch.cat((x0, x1), 1)
return out
class Mixed_4a(nn.Module):
def __init__(self):
super(Mixed_4a, self).__init__()
self.branch0 = nn.Sequential(
BasicConv2d(160, 64, kernel_size=1, stride=1),
BasicConv2d(64, 96, kernel_size=3, stride=1)
)
self.branch1 = nn.Sequential(
BasicConv2d(160, 64, kernel_size=1, stride=1),
BasicConv2d(64, 64, kernel_size=(1, 7), stride=1, padding=(0, 3)),
BasicConv2d(64, 64, kernel_size=(7, 1), stride=1, padding=(3, 0)),
BasicConv2d(64, 96, kernel_size=(3, 3), stride=1)
)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
out = torch.cat((x0, x1), 1)
return out
class Mixed_5a(nn.Module):
def __init__(self):
super(Mixed_5a, self).__init__()
self.conv = BasicConv2d(192, 192, kernel_size=3, stride=2)
self.maxpool = nn.MaxPool2d(3, stride=2)
def forward(self, x):
x0 = self.conv(x)
x1 = self.maxpool(x)
out = torch.cat((x0, x1), 1)
return out
class Inception_A(nn.Module):
def __init__(self):
super(Inception_A, self).__init__()
self.branch0 = BasicConv2d(384, 96, kernel_size=1, stride=1)
self.branch1 = nn.Sequential(
BasicConv2d(384, 64, kernel_size=1, stride=1),
BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1)
)
self.branch2 = nn.Sequential(
BasicConv2d(384, 64, kernel_size=1, stride=1),
BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),
BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1)
)
self.branch3 = nn.Sequential(
nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
BasicConv2d(384, 96, kernel_size=1, stride=1)
)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
x3 = self.branch3(x)
out = torch.cat((x0, x1, x2, x3), 1)
return out
class Reduction_A(nn.Module):
def __init__(self):
super(Reduction_A, self).__init__()
self.branch0 = BasicConv2d(384, 384, kernel_size=3, stride=2)
self.branch1 = nn.Sequential(
BasicConv2d(384, 192, kernel_size=1, stride=1),
BasicConv2d(192, 224, kernel_size=3, stride=1, padding=1),
BasicConv2d(224, 256, kernel_size=3, stride=2)
)
self.branch2 = nn.MaxPool2d(3, stride=2)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
out = torch.cat((x0, x1, x2), 1)
return out
class Inception_B(nn.Module):
def __init__(self):
super(Inception_B, self).__init__()
self.branch0 = BasicConv2d(1024, 384, kernel_size=1, stride=1)
self.branch1 = nn.Sequential(
BasicConv2d(1024, 192, kernel_size=1, stride=1),
BasicConv2d(
192, 224, kernel_size=(1, 7), stride=1, padding=(0, 3)
),
BasicConv2d(
224, 256, kernel_size=(7, 1), stride=1, padding=(3, 0)
)
)
self.branch2 = nn.Sequential(
BasicConv2d(1024, 192, kernel_size=1, stride=1),
BasicConv2d(
192, 192, kernel_size=(7, 1), stride=1, padding=(3, 0)
),
BasicConv2d(
192, 224, kernel_size=(1, 7), stride=1, padding=(0, 3)
),
BasicConv2d(
224, 224, kernel_size=(7, 1), stride=1, padding=(3, 0)
),
BasicConv2d(
224, 256, kernel_size=(1, 7), stride=1, padding=(0, 3)
)
)
self.branch3 = nn.Sequential(
nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
BasicConv2d(1024, 128, kernel_size=1, stride=1)
)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
x3 = self.branch3(x)
out = torch.cat((x0, x1, x2, x3), 1)
return out
class Reduction_B(nn.Module):
def __init__(self):
super(Reduction_B, self).__init__()
self.branch0 = nn.Sequential(
BasicConv2d(1024, 192, kernel_size=1, stride=1),
BasicConv2d(192, 192, kernel_size=3, stride=2)
)
self.branch1 = nn.Sequential(
BasicConv2d(1024, 256, kernel_size=1, stride=1),
BasicConv2d(
256, 256, kernel_size=(1, 7), stride=1, padding=(0, 3)
),
BasicConv2d(
256, 320, kernel_size=(7, 1), stride=1, padding=(3, 0)
), BasicConv2d(320, 320, kernel_size=3, stride=2)
)
self.branch2 = nn.MaxPool2d(3, stride=2)
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
out = torch.cat((x0, x1, x2), 1)
return out
class Inception_C(nn.Module):
def __init__(self):
super(Inception_C, self).__init__()
self.branch0 = BasicConv2d(1536, 256, kernel_size=1, stride=1)
self.branch1_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)
self.branch1_1a = BasicConv2d(
384, 256, kernel_size=(1, 3), stride=1, padding=(0, 1)
)
self.branch1_1b = BasicConv2d(
384, 256, kernel_size=(3, 1), stride=1, padding=(1, 0)
)
self.branch2_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)
self.branch2_1 = BasicConv2d(
384, 448, kernel_size=(3, 1), stride=1, padding=(1, 0)
)
self.branch2_2 = BasicConv2d(
448, 512, kernel_size=(1, 3), stride=1, padding=(0, 1)
)
self.branch2_3a = BasicConv2d(
512, 256, kernel_size=(1, 3), stride=1, padding=(0, 1)
)
self.branch2_3b = BasicConv2d(
512, 256, kernel_size=(3, 1), stride=1, padding=(1, 0)
)
self.branch3 = nn.Sequential(
nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
BasicConv2d(1536, 256, kernel_size=1, stride=1)
)
def forward(self, x):
x0 = self.branch0(x)
x1_0 = self.branch1_0(x)
x1_1a = self.branch1_1a(x1_0)
x1_1b = self.branch1_1b(x1_0)
x1 = torch.cat((x1_1a, x1_1b), 1)
x2_0 = self.branch2_0(x)
x2_1 = self.branch2_1(x2_0)
x2_2 = self.branch2_2(x2_1)
x2_3a = self.branch2_3a(x2_2)
x2_3b = self.branch2_3b(x2_2)
x2 = torch.cat((x2_3a, x2_3b), 1)
x3 = self.branch3(x)
out = torch.cat((x0, x1, x2, x3), 1)
return out
class InceptionV4(nn.Module):
"""Inception-v4.
Reference:
Szegedy et al. Inception-v4, Inception-ResNet and the Impact of Residual
Connections on Learning. AAAI 2017.
Public keys:
- ``inceptionv4``: InceptionV4.
"""
def __init__(self, num_classes, loss, **kwargs):
super(InceptionV4, self).__init__()
self.loss = loss
self.features = nn.Sequential(
BasicConv2d(3, 32, kernel_size=3, stride=2),
BasicConv2d(32, 32, kernel_size=3, stride=1),
BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1),
Mixed_3a(),
Mixed_4a(),
Mixed_5a(),
Inception_A(),
Inception_A(),
Inception_A(),
Inception_A(),
Reduction_A(), # Mixed_6a
Inception_B(),
Inception_B(),
Inception_B(),
Inception_B(),
Inception_B(),
Inception_B(),
Inception_B(),
Reduction_B(), # Mixed_7a
Inception_C(),
Inception_C(),
Inception_C()
)
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Linear(1536, num_classes)
def forward(self, x):
f = self.features(x)
v = self.global_avgpool(f)
v = v.view(v.size(0), -1)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError('Unsupported loss: {}'.format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
pretrain_dict = model_zoo.load_url(model_url)
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
def inceptionv4(num_classes, loss='softmax', pretrained=True, **kwargs):
model = InceptionV4(num_classes, loss, **kwargs)
if pretrained:
model_url = pretrained_settings['inceptionv4']['imagenet']['url']
init_pretrained_weights(model, model_url)
return model

View file

@ -0,0 +1,269 @@
from __future__ import division, absolute_import
import torch
import torch.utils.model_zoo as model_zoo
from torch import nn
from torch.nn import functional as F
__all__ = ['mlfn']
model_urls = {
# training epoch = 5, top1 = 51.6
'imagenet':
'https://mega.nz/#!YHxAhaxC!yu9E6zWl0x5zscSouTdbZu8gdFFytDdl-RAdD2DEfpk',
}
class MLFNBlock(nn.Module):
def __init__(
self, in_channels, out_channels, stride, fsm_channels, groups=32
):
super(MLFNBlock, self).__init__()
self.groups = groups
mid_channels = out_channels // 2
# Factor Modules
self.fm_conv1 = nn.Conv2d(in_channels, mid_channels, 1, bias=False)
self.fm_bn1 = nn.BatchNorm2d(mid_channels)
self.fm_conv2 = nn.Conv2d(
mid_channels,
mid_channels,
3,
stride=stride,
padding=1,
bias=False,
groups=self.groups
)
self.fm_bn2 = nn.BatchNorm2d(mid_channels)
self.fm_conv3 = nn.Conv2d(mid_channels, out_channels, 1, bias=False)
self.fm_bn3 = nn.BatchNorm2d(out_channels)
# Factor Selection Module
self.fsm = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(in_channels, fsm_channels[0], 1),
nn.BatchNorm2d(fsm_channels[0]),
nn.ReLU(inplace=True),
nn.Conv2d(fsm_channels[0], fsm_channels[1], 1),
nn.BatchNorm2d(fsm_channels[1]),
nn.ReLU(inplace=True),
nn.Conv2d(fsm_channels[1], self.groups, 1),
nn.BatchNorm2d(self.groups),
nn.Sigmoid(),
)
self.downsample = None
if in_channels != out_channels or stride > 1:
self.downsample = nn.Sequential(
nn.Conv2d(
in_channels, out_channels, 1, stride=stride, bias=False
),
nn.BatchNorm2d(out_channels),
)
def forward(self, x):
residual = x
s = self.fsm(x)
# reduce dimension
x = self.fm_conv1(x)
x = self.fm_bn1(x)
x = F.relu(x, inplace=True)
# group convolution
x = self.fm_conv2(x)
x = self.fm_bn2(x)
x = F.relu(x, inplace=True)
# factor selection
b, c = x.size(0), x.size(1)
n = c // self.groups
ss = s.repeat(1, n, 1, 1) # from (b, g, 1, 1) to (b, g*n=c, 1, 1)
ss = ss.view(b, n, self.groups, 1, 1)
ss = ss.permute(0, 2, 1, 3, 4).contiguous()
ss = ss.view(b, c, 1, 1)
x = ss * x
# recover dimension
x = self.fm_conv3(x)
x = self.fm_bn3(x)
x = F.relu(x, inplace=True)
if self.downsample is not None:
residual = self.downsample(residual)
return F.relu(residual + x, inplace=True), s
class MLFN(nn.Module):
"""Multi-Level Factorisation Net.
Reference:
Chang et al. Multi-Level Factorisation Net for
Person Re-Identification. CVPR 2018.
Public keys:
- ``mlfn``: MLFN (Multi-Level Factorisation Net).
"""
def __init__(
self,
num_classes,
loss='softmax',
groups=32,
channels=[64, 256, 512, 1024, 2048],
embed_dim=1024,
**kwargs
):
super(MLFN, self).__init__()
self.loss = loss
self.groups = groups
# first convolutional layer
self.conv1 = nn.Conv2d(3, channels[0], 7, stride=2, padding=3)
self.bn1 = nn.BatchNorm2d(channels[0])
self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
# main body
self.feature = nn.ModuleList(
[
# layer 1-3
MLFNBlock(channels[0], channels[1], 1, [128, 64], self.groups),
MLFNBlock(channels[1], channels[1], 1, [128, 64], self.groups),
MLFNBlock(channels[1], channels[1], 1, [128, 64], self.groups),
# layer 4-7
MLFNBlock(
channels[1], channels[2], 2, [256, 128], self.groups
),
MLFNBlock(
channels[2], channels[2], 1, [256, 128], self.groups
),
MLFNBlock(
channels[2], channels[2], 1, [256, 128], self.groups
),
MLFNBlock(
channels[2], channels[2], 1, [256, 128], self.groups
),
# layer 8-13
MLFNBlock(
channels[2], channels[3], 2, [512, 128], self.groups
),
MLFNBlock(
channels[3], channels[3], 1, [512, 128], self.groups
),
MLFNBlock(
channels[3], channels[3], 1, [512, 128], self.groups
),
MLFNBlock(
channels[3], channels[3], 1, [512, 128], self.groups
),
MLFNBlock(
channels[3], channels[3], 1, [512, 128], self.groups
),
MLFNBlock(
channels[3], channels[3], 1, [512, 128], self.groups
),
# layer 14-16
MLFNBlock(
channels[3], channels[4], 2, [512, 128], self.groups
),
MLFNBlock(
channels[4], channels[4], 1, [512, 128], self.groups
),
MLFNBlock(
channels[4], channels[4], 1, [512, 128], self.groups
),
]
)
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
# projection functions
self.fc_x = nn.Sequential(
nn.Conv2d(channels[4], embed_dim, 1, bias=False),
nn.BatchNorm2d(embed_dim),
nn.ReLU(inplace=True),
)
self.fc_s = nn.Sequential(
nn.Conv2d(self.groups * 16, embed_dim, 1, bias=False),
nn.BatchNorm2d(embed_dim),
nn.ReLU(inplace=True),
)
self.classifier = nn.Linear(embed_dim, num_classes)
self.init_params()
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu'
)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x, inplace=True)
x = self.maxpool(x)
s_hat = []
for block in self.feature:
x, s = block(x)
s_hat.append(s)
s_hat = torch.cat(s_hat, 1)
x = self.global_avgpool(x)
x = self.fc_x(x)
s_hat = self.fc_s(s_hat)
v = (x+s_hat) * 0.5
v = v.view(v.size(0), -1)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError('Unsupported loss: {}'.format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
pretrain_dict = model_zoo.load_url(model_url)
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
def mlfn(num_classes, loss='softmax', pretrained=True, **kwargs):
model = MLFN(num_classes, loss, **kwargs)
if pretrained:
# init_pretrained_weights(model, model_urls['imagenet'])
import warnings
warnings.warn(
'The imagenet pretrained weights need to be manually downloaded from {}'
.format(model_urls['imagenet'])
)
return model

View file

@ -0,0 +1,274 @@
from __future__ import division, absolute_import
import torch.utils.model_zoo as model_zoo
from torch import nn
from torch.nn import functional as F
__all__ = ['mobilenetv2_x1_0', 'mobilenetv2_x1_4']
model_urls = {
# 1.0: top-1 71.3
'mobilenetv2_x1_0':
'https://mega.nz/#!NKp2wAIA!1NH1pbNzY_M2hVk_hdsxNM1NUOWvvGPHhaNr-fASF6c',
# 1.4: top-1 73.9
'mobilenetv2_x1_4':
'https://mega.nz/#!RGhgEIwS!xN2s2ZdyqI6vQ3EwgmRXLEW3khr9tpXg96G9SUJugGk',
}
class ConvBlock(nn.Module):
"""Basic convolutional block.
convolution (bias discarded) + batch normalization + relu6.
Args:
in_c (int): number of input channels.
out_c (int): number of output channels.
k (int or tuple): kernel size.
s (int or tuple): stride.
p (int or tuple): padding.
g (int): number of blocked connections from input channels
to output channels (default: 1).
"""
def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
super(ConvBlock, self).__init__()
self.conv = nn.Conv2d(
in_c, out_c, k, stride=s, padding=p, bias=False, groups=g
)
self.bn = nn.BatchNorm2d(out_c)
def forward(self, x):
return F.relu6(self.bn(self.conv(x)))
class Bottleneck(nn.Module):
def __init__(self, in_channels, out_channels, expansion_factor, stride=1):
super(Bottleneck, self).__init__()
mid_channels = in_channels * expansion_factor
self.use_residual = stride == 1 and in_channels == out_channels
self.conv1 = ConvBlock(in_channels, mid_channels, 1)
self.dwconv2 = ConvBlock(
mid_channels, mid_channels, 3, stride, 1, g=mid_channels
)
self.conv3 = nn.Sequential(
nn.Conv2d(mid_channels, out_channels, 1, bias=False),
nn.BatchNorm2d(out_channels),
)
def forward(self, x):
m = self.conv1(x)
m = self.dwconv2(m)
m = self.conv3(m)
if self.use_residual:
return x + m
else:
return m
class MobileNetV2(nn.Module):
"""MobileNetV2.
Reference:
Sandler et al. MobileNetV2: Inverted Residuals and
Linear Bottlenecks. CVPR 2018.
Public keys:
- ``mobilenetv2_x1_0``: MobileNetV2 x1.0.
- ``mobilenetv2_x1_4``: MobileNetV2 x1.4.
"""
def __init__(
self,
num_classes,
width_mult=1,
loss='softmax',
fc_dims=None,
dropout_p=None,
**kwargs
):
super(MobileNetV2, self).__init__()
self.loss = loss
self.in_channels = int(32 * width_mult)
self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280
# construct layers
self.conv1 = ConvBlock(3, self.in_channels, 3, s=2, p=1)
self.conv2 = self._make_layer(
Bottleneck, 1, int(16 * width_mult), 1, 1
)
self.conv3 = self._make_layer(
Bottleneck, 6, int(24 * width_mult), 2, 2
)
self.conv4 = self._make_layer(
Bottleneck, 6, int(32 * width_mult), 3, 2
)
self.conv5 = self._make_layer(
Bottleneck, 6, int(64 * width_mult), 4, 2
)
self.conv6 = self._make_layer(
Bottleneck, 6, int(96 * width_mult), 3, 1
)
self.conv7 = self._make_layer(
Bottleneck, 6, int(160 * width_mult), 3, 2
)
self.conv8 = self._make_layer(
Bottleneck, 6, int(320 * width_mult), 1, 1
)
self.conv9 = ConvBlock(self.in_channels, self.feature_dim, 1)
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.fc = self._construct_fc_layer(
fc_dims, self.feature_dim, dropout_p
)
self.classifier = nn.Linear(self.feature_dim, num_classes)
self._init_params()
def _make_layer(self, block, t, c, n, s):
# t: expansion factor
# c: output channels
# n: number of blocks
# s: stride for first layer
layers = []
layers.append(block(self.in_channels, c, t, s))
self.in_channels = c
for i in range(1, n):
layers.append(block(self.in_channels, c, t))
return nn.Sequential(*layers)
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
"""Constructs fully connected layer.
Args:
fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
input_dim (int): input dimension
dropout_p (float): dropout probability, if None, dropout is unused
"""
if fc_dims is None:
self.feature_dim = input_dim
return None
assert isinstance(
fc_dims, (list, tuple)
), 'fc_dims must be either list or tuple, but got {}'.format(
type(fc_dims)
)
layers = []
for dim in fc_dims:
layers.append(nn.Linear(input_dim, dim))
layers.append(nn.BatchNorm1d(dim))
layers.append(nn.ReLU(inplace=True))
if dropout_p is not None:
layers.append(nn.Dropout(p=dropout_p))
input_dim = dim
self.feature_dim = fc_dims[-1]
return nn.Sequential(*layers)
def _init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu'
)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def featuremaps(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = self.conv6(x)
x = self.conv7(x)
x = self.conv8(x)
x = self.conv9(x)
return x
def forward(self, x):
f = self.featuremaps(x)
v = self.global_avgpool(f)
v = v.view(v.size(0), -1)
if self.fc is not None:
v = self.fc(v)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
pretrain_dict = model_zoo.load_url(model_url)
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
def mobilenetv2_x1_0(num_classes, loss, pretrained=True, **kwargs):
model = MobileNetV2(
num_classes,
loss=loss,
width_mult=1,
fc_dims=None,
dropout_p=None,
**kwargs
)
if pretrained:
# init_pretrained_weights(model, model_urls['mobilenetv2_x1_0'])
import warnings
warnings.warn(
'The imagenet pretrained weights need to be manually downloaded from {}'
.format(model_urls['mobilenetv2_x1_0'])
)
return model
def mobilenetv2_x1_4(num_classes, loss, pretrained=True, **kwargs):
model = MobileNetV2(
num_classes,
loss=loss,
width_mult=1.4,
fc_dims=None,
dropout_p=None,
**kwargs
)
if pretrained:
# init_pretrained_weights(model, model_urls['mobilenetv2_x1_4'])
import warnings
warnings.warn(
'The imagenet pretrained weights need to be manually downloaded from {}'
.format(model_urls['mobilenetv2_x1_4'])
)
return model

View file

@ -0,0 +1,206 @@
from __future__ import division, absolute_import
import torch
from torch import nn
from torch.nn import functional as F
__all__ = ['MuDeep']
class ConvBlock(nn.Module):
"""Basic convolutional block.
convolution + batch normalization + relu.
Args:
in_c (int): number of input channels.
out_c (int): number of output channels.
k (int or tuple): kernel size.
s (int or tuple): stride.
p (int or tuple): padding.
"""
def __init__(self, in_c, out_c, k, s, p):
super(ConvBlock, self).__init__()
self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p)
self.bn = nn.BatchNorm2d(out_c)
def forward(self, x):
return F.relu(self.bn(self.conv(x)))
class ConvLayers(nn.Module):
"""Preprocessing layers."""
def __init__(self):
super(ConvLayers, self).__init__()
self.conv1 = ConvBlock(3, 48, k=3, s=1, p=1)
self.conv2 = ConvBlock(48, 96, k=3, s=1, p=1)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.maxpool(x)
return x
class MultiScaleA(nn.Module):
"""Multi-scale stream layer A (Sec.3.1)"""
def __init__(self):
super(MultiScaleA, self).__init__()
self.stream1 = nn.Sequential(
ConvBlock(96, 96, k=1, s=1, p=0),
ConvBlock(96, 24, k=3, s=1, p=1),
)
self.stream2 = nn.Sequential(
nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
ConvBlock(96, 24, k=1, s=1, p=0),
)
self.stream3 = ConvBlock(96, 24, k=1, s=1, p=0)
self.stream4 = nn.Sequential(
ConvBlock(96, 16, k=1, s=1, p=0),
ConvBlock(16, 24, k=3, s=1, p=1),
ConvBlock(24, 24, k=3, s=1, p=1),
)
def forward(self, x):
s1 = self.stream1(x)
s2 = self.stream2(x)
s3 = self.stream3(x)
s4 = self.stream4(x)
y = torch.cat([s1, s2, s3, s4], dim=1)
return y
class Reduction(nn.Module):
"""Reduction layer (Sec.3.1)"""
def __init__(self):
super(Reduction, self).__init__()
self.stream1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.stream2 = ConvBlock(96, 96, k=3, s=2, p=1)
self.stream3 = nn.Sequential(
ConvBlock(96, 48, k=1, s=1, p=0),
ConvBlock(48, 56, k=3, s=1, p=1),
ConvBlock(56, 64, k=3, s=2, p=1),
)
def forward(self, x):
s1 = self.stream1(x)
s2 = self.stream2(x)
s3 = self.stream3(x)
y = torch.cat([s1, s2, s3], dim=1)
return y
class MultiScaleB(nn.Module):
"""Multi-scale stream layer B (Sec.3.1)"""
def __init__(self):
super(MultiScaleB, self).__init__()
self.stream1 = nn.Sequential(
nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
ConvBlock(256, 256, k=1, s=1, p=0),
)
self.stream2 = nn.Sequential(
ConvBlock(256, 64, k=1, s=1, p=0),
ConvBlock(64, 128, k=(1, 3), s=1, p=(0, 1)),
ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)),
)
self.stream3 = ConvBlock(256, 256, k=1, s=1, p=0)
self.stream4 = nn.Sequential(
ConvBlock(256, 64, k=1, s=1, p=0),
ConvBlock(64, 64, k=(1, 3), s=1, p=(0, 1)),
ConvBlock(64, 128, k=(3, 1), s=1, p=(1, 0)),
ConvBlock(128, 128, k=(1, 3), s=1, p=(0, 1)),
ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)),
)
def forward(self, x):
s1 = self.stream1(x)
s2 = self.stream2(x)
s3 = self.stream3(x)
s4 = self.stream4(x)
return s1, s2, s3, s4
class Fusion(nn.Module):
"""Saliency-based learning fusion layer (Sec.3.2)"""
def __init__(self):
super(Fusion, self).__init__()
self.a1 = nn.Parameter(torch.rand(1, 256, 1, 1))
self.a2 = nn.Parameter(torch.rand(1, 256, 1, 1))
self.a3 = nn.Parameter(torch.rand(1, 256, 1, 1))
self.a4 = nn.Parameter(torch.rand(1, 256, 1, 1))
# We add an average pooling layer to reduce the spatial dimension
# of feature maps, which differs from the original paper.
self.avgpool = nn.AvgPool2d(kernel_size=4, stride=4, padding=0)
def forward(self, x1, x2, x3, x4):
s1 = self.a1.expand_as(x1) * x1
s2 = self.a2.expand_as(x2) * x2
s3 = self.a3.expand_as(x3) * x3
s4 = self.a4.expand_as(x4) * x4
y = self.avgpool(s1 + s2 + s3 + s4)
return y
class MuDeep(nn.Module):
"""Multiscale deep neural network.
Reference:
Qian et al. Multi-scale Deep Learning Architectures
for Person Re-identification. ICCV 2017.
Public keys:
- ``mudeep``: Multiscale deep neural network.
"""
def __init__(self, num_classes, loss='softmax', **kwargs):
super(MuDeep, self).__init__()
self.loss = loss
self.block1 = ConvLayers()
self.block2 = MultiScaleA()
self.block3 = Reduction()
self.block4 = MultiScaleB()
self.block5 = Fusion()
# Due to this fully connected layer, input image has to be fixed
# in shape, i.e. (3, 256, 128), such that the last convolutional feature
# maps are of shape (256, 16, 8). If input shape is changed,
# the input dimension of this layer has to be changed accordingly.
self.fc = nn.Sequential(
nn.Linear(256 * 16 * 8, 4096),
nn.BatchNorm1d(4096),
nn.ReLU(),
)
self.classifier = nn.Linear(4096, num_classes)
self.feat_dim = 4096
def featuremaps(self, x):
x = self.block1(x)
x = self.block2(x)
x = self.block3(x)
x = self.block4(x)
x = self.block5(*x)
return x
def forward(self, x):
x = self.featuremaps(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
y = self.classifier(x)
if not self.training:
return x
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, x
else:
raise KeyError('Unsupported loss: {}'.format(self.loss))

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,598 @@
from __future__ import division, absolute_import
import warnings
import torch
from torch import nn
from torch.nn import functional as F
__all__ = [
'osnet_x1_0', 'osnet_x0_75', 'osnet_x0_5', 'osnet_x0_25', 'osnet_ibn_x1_0'
]
pretrained_urls = {
'osnet_x1_0':
'https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY',
'osnet_x0_75':
'https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq',
'osnet_x0_5':
'https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i',
'osnet_x0_25':
'https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs',
'osnet_ibn_x1_0':
'https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l'
}
##########
# Basic layers
##########
class ConvLayer(nn.Module):
"""Convolution layer (conv + bn + relu)."""
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
groups=1,
IN=False
):
super(ConvLayer, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=padding,
bias=False,
groups=groups
)
if IN:
self.bn = nn.InstanceNorm2d(out_channels, affine=True)
else:
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Conv1x1(nn.Module):
"""1x1 convolution + bn + relu."""
def __init__(self, in_channels, out_channels, stride=1, groups=1):
super(Conv1x1, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
1,
stride=stride,
padding=0,
bias=False,
groups=groups
)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Conv1x1Linear(nn.Module):
"""1x1 convolution + bn (w/o non-linearity)."""
def __init__(self, in_channels, out_channels, stride=1):
super(Conv1x1Linear, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, 1, stride=stride, padding=0, bias=False
)
self.bn = nn.BatchNorm2d(out_channels)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return x
class Conv3x3(nn.Module):
"""3x3 convolution + bn + relu."""
def __init__(self, in_channels, out_channels, stride=1, groups=1):
super(Conv3x3, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
3,
stride=stride,
padding=1,
bias=False,
groups=groups
)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class LightConv3x3(nn.Module):
"""Lightweight 3x3 convolution.
1x1 (linear) + dw 3x3 (nonlinear).
"""
def __init__(self, in_channels, out_channels):
super(LightConv3x3, self).__init__()
self.conv1 = nn.Conv2d(
in_channels, out_channels, 1, stride=1, padding=0, bias=False
)
self.conv2 = nn.Conv2d(
out_channels,
out_channels,
3,
stride=1,
padding=1,
bias=False,
groups=out_channels
)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.bn(x)
x = self.relu(x)
return x
##########
# Building blocks for omni-scale feature learning
##########
class ChannelGate(nn.Module):
"""A mini-network that generates channel-wise gates conditioned on input tensor."""
def __init__(
self,
in_channels,
num_gates=None,
return_gates=False,
gate_activation='sigmoid',
reduction=16,
layer_norm=False
):
super(ChannelGate, self).__init__()
if num_gates is None:
num_gates = in_channels
self.return_gates = return_gates
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.fc1 = nn.Conv2d(
in_channels,
in_channels // reduction,
kernel_size=1,
bias=True,
padding=0
)
self.norm1 = None
if layer_norm:
self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
self.relu = nn.ReLU(inplace=True)
self.fc2 = nn.Conv2d(
in_channels // reduction,
num_gates,
kernel_size=1,
bias=True,
padding=0
)
if gate_activation == 'sigmoid':
self.gate_activation = nn.Sigmoid()
elif gate_activation == 'relu':
self.gate_activation = nn.ReLU(inplace=True)
elif gate_activation == 'linear':
self.gate_activation = None
else:
raise RuntimeError(
"Unknown gate activation: {}".format(gate_activation)
)
def forward(self, x):
input = x
x = self.global_avgpool(x)
x = self.fc1(x)
if self.norm1 is not None:
x = self.norm1(x)
x = self.relu(x)
x = self.fc2(x)
if self.gate_activation is not None:
x = self.gate_activation(x)
if self.return_gates:
return x
return input * x
class OSBlock(nn.Module):
"""Omni-scale feature learning block."""
def __init__(
self,
in_channels,
out_channels,
IN=False,
bottleneck_reduction=4,
**kwargs
):
super(OSBlock, self).__init__()
mid_channels = out_channels // bottleneck_reduction
self.conv1 = Conv1x1(in_channels, mid_channels)
self.conv2a = LightConv3x3(mid_channels, mid_channels)
self.conv2b = nn.Sequential(
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
)
self.conv2c = nn.Sequential(
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
)
self.conv2d = nn.Sequential(
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
LightConv3x3(mid_channels, mid_channels),
)
self.gate = ChannelGate(mid_channels)
self.conv3 = Conv1x1Linear(mid_channels, out_channels)
self.downsample = None
if in_channels != out_channels:
self.downsample = Conv1x1Linear(in_channels, out_channels)
self.IN = None
if IN:
self.IN = nn.InstanceNorm2d(out_channels, affine=True)
def forward(self, x):
identity = x
x1 = self.conv1(x)
x2a = self.conv2a(x1)
x2b = self.conv2b(x1)
x2c = self.conv2c(x1)
x2d = self.conv2d(x1)
x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
x3 = self.conv3(x2)
if self.downsample is not None:
identity = self.downsample(identity)
out = x3 + identity
if self.IN is not None:
out = self.IN(out)
return F.relu(out)
##########
# Network architecture
##########
class OSNet(nn.Module):
"""Omni-Scale Network.
Reference:
- Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.
- Zhou et al. Learning Generalisable Omni-Scale Representations
for Person Re-Identification. TPAMI, 2021.
"""
def __init__(
self,
num_classes,
blocks,
layers,
channels,
feature_dim=512,
loss='softmax',
IN=False,
**kwargs
):
super(OSNet, self).__init__()
num_blocks = len(blocks)
assert num_blocks == len(layers)
assert num_blocks == len(channels) - 1
self.loss = loss
self.feature_dim = feature_dim
# convolutional backbone
self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
self.conv2 = self._make_layer(
blocks[0],
layers[0],
channels[0],
channels[1],
reduce_spatial_size=True,
IN=IN
)
self.conv3 = self._make_layer(
blocks[1],
layers[1],
channels[1],
channels[2],
reduce_spatial_size=True
)
self.conv4 = self._make_layer(
blocks[2],
layers[2],
channels[2],
channels[3],
reduce_spatial_size=False
)
self.conv5 = Conv1x1(channels[3], channels[3])
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
# fully connected layer
self.fc = self._construct_fc_layer(
self.feature_dim, channels[3], dropout_p=None
)
# identity classification layer
self.classifier = nn.Linear(self.feature_dim, num_classes)
self._init_params()
def _make_layer(
self,
block,
layer,
in_channels,
out_channels,
reduce_spatial_size,
IN=False
):
layers = []
layers.append(block(in_channels, out_channels, IN=IN))
for i in range(1, layer):
layers.append(block(out_channels, out_channels, IN=IN))
if reduce_spatial_size:
layers.append(
nn.Sequential(
Conv1x1(out_channels, out_channels),
nn.AvgPool2d(2, stride=2)
)
)
return nn.Sequential(*layers)
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
if fc_dims is None or fc_dims < 0:
self.feature_dim = input_dim
return None
if isinstance(fc_dims, int):
fc_dims = [fc_dims]
layers = []
for dim in fc_dims:
layers.append(nn.Linear(input_dim, dim))
layers.append(nn.BatchNorm1d(dim))
layers.append(nn.ReLU(inplace=True))
if dropout_p is not None:
layers.append(nn.Dropout(p=dropout_p))
input_dim = dim
self.feature_dim = fc_dims[-1]
return nn.Sequential(*layers)
def _init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu'
)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def featuremaps(self, x):
x = self.conv1(x)
x = self.maxpool(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
return x
def forward(self, x, return_featuremaps=False):
x = self.featuremaps(x)
if return_featuremaps:
return x
v = self.global_avgpool(x)
v = v.view(v.size(0), -1)
if self.fc is not None:
v = self.fc(v)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
def init_pretrained_weights(model, key=''):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
import os
import errno
import gdown
from collections import OrderedDict
def _get_torch_home():
ENV_TORCH_HOME = 'TORCH_HOME'
ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
DEFAULT_CACHE_DIR = '~/.cache'
torch_home = os.path.expanduser(
os.getenv(
ENV_TORCH_HOME,
os.path.join(
os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
)
)
)
return torch_home
torch_home = _get_torch_home()
model_dir = os.path.join(torch_home, 'checkpoints')
try:
os.makedirs(model_dir)
except OSError as e:
if e.errno == errno.EEXIST:
# Directory already exists, ignore.
pass
else:
# Unexpected OSError, re-raise.
raise
filename = key + '_imagenet.pth'
cached_file = os.path.join(model_dir, filename)
if not os.path.exists(cached_file):
gdown.download(pretrained_urls[key], cached_file, quiet=False)
state_dict = torch.load(cached_file)
model_dict = model.state_dict()
new_state_dict = OrderedDict()
matched_layers, discarded_layers = [], []
for k, v in state_dict.items():
if k.startswith('module.'):
k = k[7:] # discard module.
if k in model_dict and model_dict[k].size() == v.size():
new_state_dict[k] = v
matched_layers.append(k)
else:
discarded_layers.append(k)
model_dict.update(new_state_dict)
model.load_state_dict(model_dict)
if len(matched_layers) == 0:
warnings.warn(
'The pretrained weights from "{}" cannot be loaded, '
'please check the key names manually '
'(** ignored and continue **)'.format(cached_file)
)
else:
print(
'Successfully loaded imagenet pretrained weights from "{}"'.
format(cached_file)
)
if len(discarded_layers) > 0:
print(
'** The following layers are discarded '
'due to unmatched keys or layer size: {}'.
format(discarded_layers)
)
##########
# Instantiation
##########
def osnet_x1_0(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
# standard size (width x1.0)
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[64, 256, 384, 512],
loss=loss,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_x1_0')
return model
def osnet_x0_75(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
# medium size (width x0.75)
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[48, 192, 288, 384],
loss=loss,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_x0_75')
return model
def osnet_x0_5(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
# tiny size (width x0.5)
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[32, 128, 192, 256],
loss=loss,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_x0_5')
return model
def osnet_x0_25(num_classes=1000, pretrained=True, loss='softmax', **kwargs):
# very tiny size (width x0.25)
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[16, 64, 96, 128],
loss=loss,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_x0_25')
return model
def osnet_ibn_x1_0(
num_classes=1000, pretrained=True, loss='softmax', **kwargs
):
# standard size (width x1.0) + IBN layer
# Ref: Pan et al. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. ECCV, 2018.
model = OSNet(
num_classes,
blocks=[OSBlock, OSBlock, OSBlock],
layers=[2, 2, 2],
channels=[64, 256, 384, 512],
loss=loss,
IN=True,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_ibn_x1_0')
return model

View file

@ -0,0 +1,609 @@
from __future__ import division, absolute_import
import warnings
import torch
from torch import nn
from torch.nn import functional as F
__all__ = [
'osnet_ain_x1_0', 'osnet_ain_x0_75', 'osnet_ain_x0_5', 'osnet_ain_x0_25'
]
pretrained_urls = {
'osnet_ain_x1_0':
'https://drive.google.com/uc?id=1-CaioD9NaqbHK_kzSMW8VE4_3KcsRjEo',
'osnet_ain_x0_75':
'https://drive.google.com/uc?id=1apy0hpsMypqstfencdH-jKIUEFOW4xoM',
'osnet_ain_x0_5':
'https://drive.google.com/uc?id=1KusKvEYyKGDTUBVRxRiz55G31wkihB6l',
'osnet_ain_x0_25':
'https://drive.google.com/uc?id=1SxQt2AvmEcgWNhaRb2xC4rP6ZwVDP0Wt'
}
##########
# Basic layers
##########
class ConvLayer(nn.Module):
"""Convolution layer (conv + bn + relu)."""
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
groups=1,
IN=False
):
super(ConvLayer, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=padding,
bias=False,
groups=groups
)
if IN:
self.bn = nn.InstanceNorm2d(out_channels, affine=True)
else:
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return self.relu(x)
class Conv1x1(nn.Module):
"""1x1 convolution + bn + relu."""
def __init__(self, in_channels, out_channels, stride=1, groups=1):
super(Conv1x1, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
1,
stride=stride,
padding=0,
bias=False,
groups=groups
)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return self.relu(x)
class Conv1x1Linear(nn.Module):
"""1x1 convolution + bn (w/o non-linearity)."""
def __init__(self, in_channels, out_channels, stride=1, bn=True):
super(Conv1x1Linear, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, 1, stride=stride, padding=0, bias=False
)
self.bn = None
if bn:
self.bn = nn.BatchNorm2d(out_channels)
def forward(self, x):
x = self.conv(x)
if self.bn is not None:
x = self.bn(x)
return x
class Conv3x3(nn.Module):
"""3x3 convolution + bn + relu."""
def __init__(self, in_channels, out_channels, stride=1, groups=1):
super(Conv3x3, self).__init__()
self.conv = nn.Conv2d(
in_channels,
out_channels,
3,
stride=stride,
padding=1,
bias=False,
groups=groups
)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return self.relu(x)
class LightConv3x3(nn.Module):
"""Lightweight 3x3 convolution.
1x1 (linear) + dw 3x3 (nonlinear).
"""
def __init__(self, in_channels, out_channels):
super(LightConv3x3, self).__init__()
self.conv1 = nn.Conv2d(
in_channels, out_channels, 1, stride=1, padding=0, bias=False
)
self.conv2 = nn.Conv2d(
out_channels,
out_channels,
3,
stride=1,
padding=1,
bias=False,
groups=out_channels
)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.bn(x)
return self.relu(x)
class LightConvStream(nn.Module):
"""Lightweight convolution stream."""
def __init__(self, in_channels, out_channels, depth):
super(LightConvStream, self).__init__()
assert depth >= 1, 'depth must be equal to or larger than 1, but got {}'.format(
depth
)
layers = []
layers += [LightConv3x3(in_channels, out_channels)]
for i in range(depth - 1):
layers += [LightConv3x3(out_channels, out_channels)]
self.layers = nn.Sequential(*layers)
def forward(self, x):
return self.layers(x)
##########
# Building blocks for omni-scale feature learning
##########
class ChannelGate(nn.Module):
"""A mini-network that generates channel-wise gates conditioned on input tensor."""
def __init__(
self,
in_channels,
num_gates=None,
return_gates=False,
gate_activation='sigmoid',
reduction=16,
layer_norm=False
):
super(ChannelGate, self).__init__()
if num_gates is None:
num_gates = in_channels
self.return_gates = return_gates
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.fc1 = nn.Conv2d(
in_channels,
in_channels // reduction,
kernel_size=1,
bias=True,
padding=0
)
self.norm1 = None
if layer_norm:
self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
self.relu = nn.ReLU()
self.fc2 = nn.Conv2d(
in_channels // reduction,
num_gates,
kernel_size=1,
bias=True,
padding=0
)
if gate_activation == 'sigmoid':
self.gate_activation = nn.Sigmoid()
elif gate_activation == 'relu':
self.gate_activation = nn.ReLU()
elif gate_activation == 'linear':
self.gate_activation = None
else:
raise RuntimeError(
"Unknown gate activation: {}".format(gate_activation)
)
def forward(self, x):
input = x
x = self.global_avgpool(x)
x = self.fc1(x)
if self.norm1 is not None:
x = self.norm1(x)
x = self.relu(x)
x = self.fc2(x)
if self.gate_activation is not None:
x = self.gate_activation(x)
if self.return_gates:
return x
return input * x
class OSBlock(nn.Module):
"""Omni-scale feature learning block."""
def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
super(OSBlock, self).__init__()
assert T >= 1
assert out_channels >= reduction and out_channels % reduction == 0
mid_channels = out_channels // reduction
self.conv1 = Conv1x1(in_channels, mid_channels)
self.conv2 = nn.ModuleList()
for t in range(1, T + 1):
self.conv2 += [LightConvStream(mid_channels, mid_channels, t)]
self.gate = ChannelGate(mid_channels)
self.conv3 = Conv1x1Linear(mid_channels, out_channels)
self.downsample = None
if in_channels != out_channels:
self.downsample = Conv1x1Linear(in_channels, out_channels)
def forward(self, x):
identity = x
x1 = self.conv1(x)
x2 = 0
for conv2_t in self.conv2:
x2_t = conv2_t(x1)
x2 = x2 + self.gate(x2_t)
x3 = self.conv3(x2)
if self.downsample is not None:
identity = self.downsample(identity)
out = x3 + identity
return F.relu(out)
class OSBlockINin(nn.Module):
"""Omni-scale feature learning block with instance normalization."""
def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
super(OSBlockINin, self).__init__()
assert T >= 1
assert out_channels >= reduction and out_channels % reduction == 0
mid_channels = out_channels // reduction
self.conv1 = Conv1x1(in_channels, mid_channels)
self.conv2 = nn.ModuleList()
for t in range(1, T + 1):
self.conv2 += [LightConvStream(mid_channels, mid_channels, t)]
self.gate = ChannelGate(mid_channels)
self.conv3 = Conv1x1Linear(mid_channels, out_channels, bn=False)
self.downsample = None
if in_channels != out_channels:
self.downsample = Conv1x1Linear(in_channels, out_channels)
self.IN = nn.InstanceNorm2d(out_channels, affine=True)
def forward(self, x):
identity = x
x1 = self.conv1(x)
x2 = 0
for conv2_t in self.conv2:
x2_t = conv2_t(x1)
x2 = x2 + self.gate(x2_t)
x3 = self.conv3(x2)
x3 = self.IN(x3) # IN inside residual
if self.downsample is not None:
identity = self.downsample(identity)
out = x3 + identity
return F.relu(out)
##########
# Network architecture
##########
class OSNet(nn.Module):
"""Omni-Scale Network.
Reference:
- Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.
- Zhou et al. Learning Generalisable Omni-Scale Representations
for Person Re-Identification. TPAMI, 2021.
"""
def __init__(
self,
num_classes,
blocks,
layers,
channels,
feature_dim=512,
loss='softmax',
conv1_IN=False,
**kwargs
):
super(OSNet, self).__init__()
num_blocks = len(blocks)
assert num_blocks == len(layers)
assert num_blocks == len(channels) - 1
self.loss = loss
self.feature_dim = feature_dim
# convolutional backbone
self.conv1 = ConvLayer(
3, channels[0], 7, stride=2, padding=3, IN=conv1_IN
)
self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
self.conv2 = self._make_layer(
blocks[0], layers[0], channels[0], channels[1]
)
self.pool2 = nn.Sequential(
Conv1x1(channels[1], channels[1]), nn.AvgPool2d(2, stride=2)
)
self.conv3 = self._make_layer(
blocks[1], layers[1], channels[1], channels[2]
)
self.pool3 = nn.Sequential(
Conv1x1(channels[2], channels[2]), nn.AvgPool2d(2, stride=2)
)
self.conv4 = self._make_layer(
blocks[2], layers[2], channels[2], channels[3]
)
self.conv5 = Conv1x1(channels[3], channels[3])
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
# fully connected layer
self.fc = self._construct_fc_layer(
self.feature_dim, channels[3], dropout_p=None
)
# identity classification layer
self.classifier = nn.Linear(self.feature_dim, num_classes)
self._init_params()
def _make_layer(self, blocks, layer, in_channels, out_channels):
layers = []
layers += [blocks[0](in_channels, out_channels)]
for i in range(1, len(blocks)):
layers += [blocks[i](out_channels, out_channels)]
return nn.Sequential(*layers)
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
if fc_dims is None or fc_dims < 0:
self.feature_dim = input_dim
return None
if isinstance(fc_dims, int):
fc_dims = [fc_dims]
layers = []
for dim in fc_dims:
layers.append(nn.Linear(input_dim, dim))
layers.append(nn.BatchNorm1d(dim))
layers.append(nn.ReLU())
if dropout_p is not None:
layers.append(nn.Dropout(p=dropout_p))
input_dim = dim
self.feature_dim = fc_dims[-1]
return nn.Sequential(*layers)
def _init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu'
)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.InstanceNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def featuremaps(self, x):
x = self.conv1(x)
x = self.maxpool(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.conv3(x)
x = self.pool3(x)
x = self.conv4(x)
x = self.conv5(x)
return x
def forward(self, x, return_featuremaps=False):
x = self.featuremaps(x)
if return_featuremaps:
return x
v = self.global_avgpool(x)
v = v.view(v.size(0), -1)
if self.fc is not None:
v = self.fc(v)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
def init_pretrained_weights(model, key=''):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
import os
import errno
import gdown
from collections import OrderedDict
def _get_torch_home():
ENV_TORCH_HOME = 'TORCH_HOME'
ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
DEFAULT_CACHE_DIR = '~/.cache'
torch_home = os.path.expanduser(
os.getenv(
ENV_TORCH_HOME,
os.path.join(
os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'
)
)
)
return torch_home
torch_home = _get_torch_home()
model_dir = os.path.join(torch_home, 'checkpoints')
try:
os.makedirs(model_dir)
except OSError as e:
if e.errno == errno.EEXIST:
# Directory already exists, ignore.
pass
else:
# Unexpected OSError, re-raise.
raise
filename = key + '_imagenet.pth'
cached_file = os.path.join(model_dir, filename)
if not os.path.exists(cached_file):
gdown.download(pretrained_urls[key], cached_file, quiet=False)
state_dict = torch.load(cached_file)
model_dict = model.state_dict()
new_state_dict = OrderedDict()
matched_layers, discarded_layers = [], []
for k, v in state_dict.items():
if k.startswith('module.'):
k = k[7:] # discard module.
if k in model_dict and model_dict[k].size() == v.size():
new_state_dict[k] = v
matched_layers.append(k)
else:
discarded_layers.append(k)
model_dict.update(new_state_dict)
model.load_state_dict(model_dict)
if len(matched_layers) == 0:
warnings.warn(
'The pretrained weights from "{}" cannot be loaded, '
'please check the key names manually '
'(** ignored and continue **)'.format(cached_file)
)
else:
print(
'Successfully loaded imagenet pretrained weights from "{}"'.
format(cached_file)
)
if len(discarded_layers) > 0:
print(
'** The following layers are discarded '
'due to unmatched keys or layer size: {}'.
format(discarded_layers)
)
##########
# Instantiation
##########
def osnet_ain_x1_0(
num_classes=1000, pretrained=True, loss='softmax', **kwargs
):
model = OSNet(
num_classes,
blocks=[
[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin],
[OSBlockINin, OSBlock]
],
layers=[2, 2, 2],
channels=[64, 256, 384, 512],
loss=loss,
conv1_IN=True,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_ain_x1_0')
return model
def osnet_ain_x0_75(
num_classes=1000, pretrained=True, loss='softmax', **kwargs
):
model = OSNet(
num_classes,
blocks=[
[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin],
[OSBlockINin, OSBlock]
],
layers=[2, 2, 2],
channels=[48, 192, 288, 384],
loss=loss,
conv1_IN=True,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_ain_x0_75')
return model
def osnet_ain_x0_5(
num_classes=1000, pretrained=True, loss='softmax', **kwargs
):
model = OSNet(
num_classes,
blocks=[
[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin],
[OSBlockINin, OSBlock]
],
layers=[2, 2, 2],
channels=[32, 128, 192, 256],
loss=loss,
conv1_IN=True,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_ain_x0_5')
return model
def osnet_ain_x0_25(
num_classes=1000, pretrained=True, loss='softmax', **kwargs
):
model = OSNet(
num_classes,
blocks=[
[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin],
[OSBlockINin, OSBlock]
],
layers=[2, 2, 2],
channels=[16, 64, 96, 128],
loss=loss,
conv1_IN=True,
**kwargs
)
if pretrained:
init_pretrained_weights(model, key='osnet_ain_x0_25')
return model

View file

@ -0,0 +1,314 @@
from __future__ import division, absolute_import
import torch.utils.model_zoo as model_zoo
from torch import nn
from torch.nn import functional as F
__all__ = ['pcb_p6', 'pcb_p4']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False
)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(
planes,
planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False
)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(
planes, planes * self.expansion, kernel_size=1, bias=False
)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class DimReduceLayer(nn.Module):
def __init__(self, in_channels, out_channels, nonlinear):
super(DimReduceLayer, self).__init__()
layers = []
layers.append(
nn.Conv2d(
in_channels, out_channels, 1, stride=1, padding=0, bias=False
)
)
layers.append(nn.BatchNorm2d(out_channels))
if nonlinear == 'relu':
layers.append(nn.ReLU(inplace=True))
elif nonlinear == 'leakyrelu':
layers.append(nn.LeakyReLU(0.1))
self.layers = nn.Sequential(*layers)
def forward(self, x):
return self.layers(x)
class PCB(nn.Module):
"""Part-based Convolutional Baseline.
Reference:
Sun et al. Beyond Part Models: Person Retrieval with Refined
Part Pooling (and A Strong Convolutional Baseline). ECCV 2018.
Public keys:
- ``pcb_p4``: PCB with 4-part strips.
- ``pcb_p6``: PCB with 6-part strips.
"""
def __init__(
self,
num_classes,
loss,
block,
layers,
parts=6,
reduced_dim=256,
nonlinear='relu',
**kwargs
):
self.inplanes = 64
super(PCB, self).__init__()
self.loss = loss
self.parts = parts
self.feature_dim = 512 * block.expansion
# backbone network
self.conv1 = nn.Conv2d(
3, 64, kernel_size=7, stride=2, padding=3, bias=False
)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=1)
# pcb layers
self.parts_avgpool = nn.AdaptiveAvgPool2d((self.parts, 1))
self.dropout = nn.Dropout(p=0.5)
self.conv5 = DimReduceLayer(
512 * block.expansion, reduced_dim, nonlinear=nonlinear
)
self.feature_dim = reduced_dim
self.classifier = nn.ModuleList(
[
nn.Linear(self.feature_dim, num_classes)
for _ in range(self.parts)
]
)
self._init_params()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(
self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias=False
),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def _init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu'
)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def featuremaps(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
def forward(self, x):
f = self.featuremaps(x)
v_g = self.parts_avgpool(f)
if not self.training:
v_g = F.normalize(v_g, p=2, dim=1)
return v_g.view(v_g.size(0), -1)
v_g = self.dropout(v_g)
v_h = self.conv5(v_g)
y = []
for i in range(self.parts):
v_h_i = v_h[:, :, i, :]
v_h_i = v_h_i.view(v_h_i.size(0), -1)
y_i = self.classifier[i](v_h_i)
y.append(y_i)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
v_g = F.normalize(v_g, p=2, dim=1)
return y, v_g.view(v_g.size(0), -1)
else:
raise KeyError('Unsupported loss: {}'.format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
pretrain_dict = model_zoo.load_url(model_url)
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
def pcb_p6(num_classes, loss='softmax', pretrained=True, **kwargs):
model = PCB(
num_classes=num_classes,
loss=loss,
block=Bottleneck,
layers=[3, 4, 6, 3],
last_stride=1,
parts=6,
reduced_dim=256,
nonlinear='relu',
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['resnet50'])
return model
def pcb_p4(num_classes, loss='softmax', pretrained=True, **kwargs):
model = PCB(
num_classes=num_classes,
loss=loss,
block=Bottleneck,
layers=[3, 4, 6, 3],
last_stride=1,
parts=4,
reduced_dim=256,
nonlinear='relu',
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['resnet50'])
return model

View file

@ -0,0 +1,530 @@
"""
Code source: https://github.com/pytorch/vision
"""
from __future__ import division, absolute_import
import torch.utils.model_zoo as model_zoo
from torch import nn
__all__ = [
'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152',
'resnext50_32x4d', 'resnext101_32x8d', 'resnet50_fc512'
]
model_urls = {
'resnet18':
'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34':
'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50':
'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101':
'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152':
'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
'resnext50_32x4d':
'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
'resnext101_32x8d':
'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
}
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=dilation,
groups=groups,
bias=False,
dilation=dilation
)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(
in_planes, out_planes, kernel_size=1, stride=stride, bias=False
)
class BasicBlock(nn.Module):
expansion = 1
def __init__(
self,
inplanes,
planes,
stride=1,
downsample=None,
groups=1,
base_width=64,
dilation=1,
norm_layer=None
):
super(BasicBlock, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError(
'BasicBlock only supports groups=1 and base_width=64'
)
if dilation > 1:
raise NotImplementedError(
"Dilation > 1 not supported in BasicBlock"
)
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(
self,
inplanes,
planes,
stride=1,
downsample=None,
groups=1,
base_width=64,
dilation=1,
norm_layer=None
):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width/64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
"""Residual network.
Reference:
- He et al. Deep Residual Learning for Image Recognition. CVPR 2016.
- Xie et al. Aggregated Residual Transformations for Deep Neural Networks. CVPR 2017.
Public keys:
- ``resnet18``: ResNet18.
- ``resnet34``: ResNet34.
- ``resnet50``: ResNet50.
- ``resnet101``: ResNet101.
- ``resnet152``: ResNet152.
- ``resnext50_32x4d``: ResNeXt50.
- ``resnext101_32x8d``: ResNeXt101.
- ``resnet50_fc512``: ResNet50 + FC.
"""
def __init__(
self,
num_classes,
loss,
block,
layers,
zero_init_residual=False,
groups=1,
width_per_group=64,
replace_stride_with_dilation=None,
norm_layer=None,
last_stride=2,
fc_dims=None,
dropout_p=None,
**kwargs
):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.loss = loss
self.feature_dim = 512 * block.expansion
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError(
"replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".
format(replace_stride_with_dilation)
)
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(
3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False
)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(
block,
128,
layers[1],
stride=2,
dilate=replace_stride_with_dilation[0]
)
self.layer3 = self._make_layer(
block,
256,
layers[2],
stride=2,
dilate=replace_stride_with_dilation[1]
)
self.layer4 = self._make_layer(
block,
512,
layers[3],
stride=last_stride,
dilate=replace_stride_with_dilation[2]
)
self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = self._construct_fc_layer(
fc_dims, 512 * block.expansion, dropout_p
)
self.classifier = nn.Linear(self.feature_dim, num_classes)
self._init_params()
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(
block(
self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer
)
)
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(
block(
self.inplanes,
planes,
groups=self.groups,
base_width=self.base_width,
dilation=self.dilation,
norm_layer=norm_layer
)
)
return nn.Sequential(*layers)
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
"""Constructs fully connected layer
Args:
fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
input_dim (int): input dimension
dropout_p (float): dropout probability, if None, dropout is unused
"""
if fc_dims is None:
self.feature_dim = input_dim
return None
assert isinstance(
fc_dims, (list, tuple)
), 'fc_dims must be either list or tuple, but got {}'.format(
type(fc_dims)
)
layers = []
for dim in fc_dims:
layers.append(nn.Linear(input_dim, dim))
layers.append(nn.BatchNorm1d(dim))
layers.append(nn.ReLU(inplace=True))
if dropout_p is not None:
layers.append(nn.Dropout(p=dropout_p))
input_dim = dim
self.feature_dim = fc_dims[-1]
return nn.Sequential(*layers)
def _init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu'
)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def featuremaps(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
def forward(self, x):
f = self.featuremaps(x)
v = self.global_avgpool(f)
v = v.view(v.size(0), -1)
if self.fc is not None:
v = self.fc(v)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
pretrain_dict = model_zoo.load_url(model_url)
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
"""ResNet"""
def resnet18(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ResNet(
num_classes=num_classes,
loss=loss,
block=BasicBlock,
layers=[2, 2, 2, 2],
last_stride=2,
fc_dims=None,
dropout_p=None,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['resnet18'])
return model
def resnet34(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ResNet(
num_classes=num_classes,
loss=loss,
block=BasicBlock,
layers=[3, 4, 6, 3],
last_stride=2,
fc_dims=None,
dropout_p=None,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['resnet34'])
return model
def resnet50(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ResNet(
num_classes=num_classes,
loss=loss,
block=Bottleneck,
layers=[3, 4, 6, 3],
last_stride=2,
fc_dims=None,
dropout_p=None,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['resnet50'])
return model
def resnet101(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ResNet(
num_classes=num_classes,
loss=loss,
block=Bottleneck,
layers=[3, 4, 23, 3],
last_stride=2,
fc_dims=None,
dropout_p=None,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['resnet101'])
return model
def resnet152(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ResNet(
num_classes=num_classes,
loss=loss,
block=Bottleneck,
layers=[3, 8, 36, 3],
last_stride=2,
fc_dims=None,
dropout_p=None,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['resnet152'])
return model
"""ResNeXt"""
def resnext50_32x4d(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ResNet(
num_classes=num_classes,
loss=loss,
block=Bottleneck,
layers=[3, 4, 6, 3],
last_stride=2,
fc_dims=None,
dropout_p=None,
groups=32,
width_per_group=4,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['resnext50_32x4d'])
return model
def resnext101_32x8d(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ResNet(
num_classes=num_classes,
loss=loss,
block=Bottleneck,
layers=[3, 4, 23, 3],
last_stride=2,
fc_dims=None,
dropout_p=None,
groups=32,
width_per_group=8,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['resnext101_32x8d'])
return model
"""
ResNet + FC
"""
def resnet50_fc512(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ResNet(
num_classes=num_classes,
loss=loss,
block=Bottleneck,
layers=[3, 4, 6, 3],
last_stride=1,
fc_dims=[512],
dropout_p=None,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['resnet50'])
return model

View file

@ -0,0 +1,289 @@
"""
Credit to https://github.com/XingangPan/IBN-Net.
"""
from __future__ import division, absolute_import
import math
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
__all__ = ['resnet50_ibn_a']
model_urls = {
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False
)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class IBN(nn.Module):
def __init__(self, planes):
super(IBN, self).__init__()
half1 = int(planes / 2)
self.half = half1
half2 = planes - half1
self.IN = nn.InstanceNorm2d(half1, affine=True)
self.BN = nn.BatchNorm2d(half2)
def forward(self, x):
split = torch.split(x, self.half, 1)
out1 = self.IN(split[0].contiguous())
out2 = self.BN(split[1].contiguous())
out = torch.cat((out1, out2), 1)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, ibn=False, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
if ibn:
self.bn1 = IBN(planes)
else:
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(
planes,
planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False
)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(
planes, planes * self.expansion, kernel_size=1, bias=False
)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
"""Residual network + IBN layer.
Reference:
- He et al. Deep Residual Learning for Image Recognition. CVPR 2016.
- Pan et al. Two at Once: Enhancing Learning and Generalization
Capacities via IBN-Net. ECCV 2018.
"""
def __init__(
self,
block,
layers,
num_classes=1000,
loss='softmax',
fc_dims=None,
dropout_p=None,
**kwargs
):
scale = 64
self.inplanes = scale
super(ResNet, self).__init__()
self.loss = loss
self.feature_dim = scale * 8 * block.expansion
self.conv1 = nn.Conv2d(
3, scale, kernel_size=7, stride=2, padding=3, bias=False
)
self.bn1 = nn.BatchNorm2d(scale)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, scale, layers[0])
self.layer2 = self._make_layer(block, scale * 2, layers[1], stride=2)
self.layer3 = self._make_layer(block, scale * 4, layers[2], stride=2)
self.layer4 = self._make_layer(block, scale * 8, layers[3], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = self._construct_fc_layer(
fc_dims, scale * 8 * block.expansion, dropout_p
)
self.classifier = nn.Linear(self.feature_dim, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.InstanceNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(
self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias=False
),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
ibn = True
if planes == 512:
ibn = False
layers.append(block(self.inplanes, planes, ibn, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, ibn))
return nn.Sequential(*layers)
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
"""Constructs fully connected layer
Args:
fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
input_dim (int): input dimension
dropout_p (float): dropout probability, if None, dropout is unused
"""
if fc_dims is None:
self.feature_dim = input_dim
return None
assert isinstance(
fc_dims, (list, tuple)
), 'fc_dims must be either list or tuple, but got {}'.format(
type(fc_dims)
)
layers = []
for dim in fc_dims:
layers.append(nn.Linear(input_dim, dim))
layers.append(nn.BatchNorm1d(dim))
layers.append(nn.ReLU(inplace=True))
if dropout_p is not None:
layers.append(nn.Dropout(p=dropout_p))
input_dim = dim
self.feature_dim = fc_dims[-1]
return nn.Sequential(*layers)
def featuremaps(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
def forward(self, x):
f = self.featuremaps(x)
v = self.avgpool(f)
v = v.view(v.size(0), -1)
if self.fc is not None:
v = self.fc(v)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
pretrain_dict = model_zoo.load_url(model_url)
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
def resnet50_ibn_a(num_classes, loss='softmax', pretrained=False, **kwargs):
model = ResNet(
Bottleneck, [3, 4, 6, 3], num_classes=num_classes, loss=loss, **kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['resnet50'])
return model

View file

@ -0,0 +1,274 @@
"""
Credit to https://github.com/XingangPan/IBN-Net.
"""
from __future__ import division, absolute_import
import math
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
__all__ = ['resnet50_ibn_b']
model_urls = {
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False
)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, IN=False):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(
planes,
planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False
)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(
planes, planes * self.expansion, kernel_size=1, bias=False
)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.IN = None
if IN:
self.IN = nn.InstanceNorm2d(planes * 4, affine=True)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
if self.IN is not None:
out = self.IN(out)
out = self.relu(out)
return out
class ResNet(nn.Module):
"""Residual network + IBN layer.
Reference:
- He et al. Deep Residual Learning for Image Recognition. CVPR 2016.
- Pan et al. Two at Once: Enhancing Learning and Generalization
Capacities via IBN-Net. ECCV 2018.
"""
def __init__(
self,
block,
layers,
num_classes=1000,
loss='softmax',
fc_dims=None,
dropout_p=None,
**kwargs
):
scale = 64
self.inplanes = scale
super(ResNet, self).__init__()
self.loss = loss
self.feature_dim = scale * 8 * block.expansion
self.conv1 = nn.Conv2d(
3, scale, kernel_size=7, stride=2, padding=3, bias=False
)
self.bn1 = nn.InstanceNorm2d(scale, affine=True)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(
block, scale, layers[0], stride=1, IN=True
)
self.layer2 = self._make_layer(
block, scale * 2, layers[1], stride=2, IN=True
)
self.layer3 = self._make_layer(block, scale * 4, layers[2], stride=2)
self.layer4 = self._make_layer(block, scale * 8, layers[3], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = self._construct_fc_layer(
fc_dims, scale * 8 * block.expansion, dropout_p
)
self.classifier = nn.Linear(self.feature_dim, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.InstanceNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1, IN=False):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(
self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias=False
),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks - 1):
layers.append(block(self.inplanes, planes))
layers.append(block(self.inplanes, planes, IN=IN))
return nn.Sequential(*layers)
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
"""Constructs fully connected layer
Args:
fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
input_dim (int): input dimension
dropout_p (float): dropout probability, if None, dropout is unused
"""
if fc_dims is None:
self.feature_dim = input_dim
return None
assert isinstance(
fc_dims, (list, tuple)
), 'fc_dims must be either list or tuple, but got {}'.format(
type(fc_dims)
)
layers = []
for dim in fc_dims:
layers.append(nn.Linear(input_dim, dim))
layers.append(nn.BatchNorm1d(dim))
layers.append(nn.ReLU(inplace=True))
if dropout_p is not None:
layers.append(nn.Dropout(p=dropout_p))
input_dim = dim
self.feature_dim = fc_dims[-1]
return nn.Sequential(*layers)
def featuremaps(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
def forward(self, x):
f = self.featuremaps(x)
v = self.avgpool(f)
v = v.view(v.size(0), -1)
if self.fc is not None:
v = self.fc(v)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
pretrain_dict = model_zoo.load_url(model_url)
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
def resnet50_ibn_b(num_classes, loss='softmax', pretrained=False, **kwargs):
model = ResNet(
Bottleneck, [3, 4, 6, 3], num_classes=num_classes, loss=loss, **kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['resnet50'])
return model

View file

@ -0,0 +1,307 @@
from __future__ import division, absolute_import
import torch
import torch.utils.model_zoo as model_zoo
from torch import nn
__all__ = ['resnet50mid']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False
)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(
planes,
planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False
)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(
planes, planes * self.expansion, kernel_size=1, bias=False
)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNetMid(nn.Module):
"""Residual network + mid-level features.
Reference:
Yu et al. The Devil is in the Middle: Exploiting Mid-level Representations for
Cross-Domain Instance Matching. arXiv:1711.08106.
Public keys:
- ``resnet50mid``: ResNet50 + mid-level feature fusion.
"""
def __init__(
self,
num_classes,
loss,
block,
layers,
last_stride=2,
fc_dims=None,
**kwargs
):
self.inplanes = 64
super(ResNetMid, self).__init__()
self.loss = loss
self.feature_dim = 512 * block.expansion
# backbone network
self.conv1 = nn.Conv2d(
3, 64, kernel_size=7, stride=2, padding=3, bias=False
)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(
block, 512, layers[3], stride=last_stride
)
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
assert fc_dims is not None
self.fc_fusion = self._construct_fc_layer(
fc_dims, 512 * block.expansion * 2
)
self.feature_dim += 512 * block.expansion
self.classifier = nn.Linear(self.feature_dim, num_classes)
self._init_params()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(
self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias=False
),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
"""Constructs fully connected layer
Args:
fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
input_dim (int): input dimension
dropout_p (float): dropout probability, if None, dropout is unused
"""
if fc_dims is None:
self.feature_dim = input_dim
return None
assert isinstance(
fc_dims, (list, tuple)
), 'fc_dims must be either list or tuple, but got {}'.format(
type(fc_dims)
)
layers = []
for dim in fc_dims:
layers.append(nn.Linear(input_dim, dim))
layers.append(nn.BatchNorm1d(dim))
layers.append(nn.ReLU(inplace=True))
if dropout_p is not None:
layers.append(nn.Dropout(p=dropout_p))
input_dim = dim
self.feature_dim = fc_dims[-1]
return nn.Sequential(*layers)
def _init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu'
)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def featuremaps(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x4a = self.layer4[0](x)
x4b = self.layer4[1](x4a)
x4c = self.layer4[2](x4b)
return x4a, x4b, x4c
def forward(self, x):
x4a, x4b, x4c = self.featuremaps(x)
v4a = self.global_avgpool(x4a)
v4b = self.global_avgpool(x4b)
v4c = self.global_avgpool(x4c)
v4ab = torch.cat([v4a, v4b], 1)
v4ab = v4ab.view(v4ab.size(0), -1)
v4ab = self.fc_fusion(v4ab)
v4c = v4c.view(v4c.size(0), -1)
v = torch.cat([v4ab, v4c], 1)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError('Unsupported loss: {}'.format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
pretrain_dict = model_zoo.load_url(model_url)
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
"""
Residual network configurations:
--
resnet18: block=BasicBlock, layers=[2, 2, 2, 2]
resnet34: block=BasicBlock, layers=[3, 4, 6, 3]
resnet50: block=Bottleneck, layers=[3, 4, 6, 3]
resnet101: block=Bottleneck, layers=[3, 4, 23, 3]
resnet152: block=Bottleneck, layers=[3, 8, 36, 3]
"""
def resnet50mid(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ResNetMid(
num_classes=num_classes,
loss=loss,
block=Bottleneck,
layers=[3, 4, 6, 3],
last_stride=2,
fc_dims=[1024],
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['resnet50'])
return model

View file

@ -0,0 +1,688 @@
from __future__ import division, absolute_import
import math
from collections import OrderedDict
import torch.nn as nn
from torch.utils import model_zoo
__all__ = [
'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152',
'se_resnext50_32x4d', 'se_resnext101_32x4d', 'se_resnet50_fc512'
]
"""
Code imported from https://github.com/Cadene/pretrained-models.pytorch
"""
pretrained_settings = {
'senet154': {
'imagenet': {
'url':
'http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth',
'input_space': 'RGB',
'input_size': [3, 224, 224],
'input_range': [0, 1],
'mean': [0.485, 0.456, 0.406],
'std': [0.229, 0.224, 0.225],
'num_classes': 1000
}
},
'se_resnet50': {
'imagenet': {
'url':
'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth',
'input_space': 'RGB',
'input_size': [3, 224, 224],
'input_range': [0, 1],
'mean': [0.485, 0.456, 0.406],
'std': [0.229, 0.224, 0.225],
'num_classes': 1000
}
},
'se_resnet101': {
'imagenet': {
'url':
'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth',
'input_space': 'RGB',
'input_size': [3, 224, 224],
'input_range': [0, 1],
'mean': [0.485, 0.456, 0.406],
'std': [0.229, 0.224, 0.225],
'num_classes': 1000
}
},
'se_resnet152': {
'imagenet': {
'url':
'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth',
'input_space': 'RGB',
'input_size': [3, 224, 224],
'input_range': [0, 1],
'mean': [0.485, 0.456, 0.406],
'std': [0.229, 0.224, 0.225],
'num_classes': 1000
}
},
'se_resnext50_32x4d': {
'imagenet': {
'url':
'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth',
'input_space': 'RGB',
'input_size': [3, 224, 224],
'input_range': [0, 1],
'mean': [0.485, 0.456, 0.406],
'std': [0.229, 0.224, 0.225],
'num_classes': 1000
}
},
'se_resnext101_32x4d': {
'imagenet': {
'url':
'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth',
'input_space': 'RGB',
'input_size': [3, 224, 224],
'input_range': [0, 1],
'mean': [0.485, 0.456, 0.406],
'std': [0.229, 0.224, 0.225],
'num_classes': 1000
}
},
}
class SEModule(nn.Module):
def __init__(self, channels, reduction):
super(SEModule, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc1 = nn.Conv2d(
channels, channels // reduction, kernel_size=1, padding=0
)
self.relu = nn.ReLU(inplace=True)
self.fc2 = nn.Conv2d(
channels // reduction, channels, kernel_size=1, padding=0
)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
module_input = x
x = self.avg_pool(x)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.sigmoid(x)
return module_input * x
class Bottleneck(nn.Module):
"""
Base class for bottlenecks that implements `forward()` method.
"""
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out = self.se_module(out) + residual
out = self.relu(out)
return out
class SEBottleneck(Bottleneck):
"""
Bottleneck for SENet154.
"""
expansion = 4
def __init__(
self, inplanes, planes, groups, reduction, stride=1, downsample=None
):
super(SEBottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes * 2)
self.conv2 = nn.Conv2d(
planes * 2,
planes * 4,
kernel_size=3,
stride=stride,
padding=1,
groups=groups,
bias=False
)
self.bn2 = nn.BatchNorm2d(planes * 4)
self.conv3 = nn.Conv2d(
planes * 4, planes * 4, kernel_size=1, bias=False
)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.se_module = SEModule(planes * 4, reduction=reduction)
self.downsample = downsample
self.stride = stride
class SEResNetBottleneck(Bottleneck):
"""
ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe
implementation and uses `stride=stride` in `conv1` and not in `conv2`
(the latter is used in the torchvision implementation of ResNet).
"""
expansion = 4
def __init__(
self, inplanes, planes, groups, reduction, stride=1, downsample=None
):
super(SEResNetBottleneck, self).__init__()
self.conv1 = nn.Conv2d(
inplanes, planes, kernel_size=1, bias=False, stride=stride
)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(
planes,
planes,
kernel_size=3,
padding=1,
groups=groups,
bias=False
)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.se_module = SEModule(planes * 4, reduction=reduction)
self.downsample = downsample
self.stride = stride
class SEResNeXtBottleneck(Bottleneck):
"""ResNeXt bottleneck type C with a Squeeze-and-Excitation module"""
expansion = 4
def __init__(
self,
inplanes,
planes,
groups,
reduction,
stride=1,
downsample=None,
base_width=4
):
super(SEResNeXtBottleneck, self).__init__()
width = int(math.floor(planes * (base_width/64.)) * groups)
self.conv1 = nn.Conv2d(
inplanes, width, kernel_size=1, bias=False, stride=1
)
self.bn1 = nn.BatchNorm2d(width)
self.conv2 = nn.Conv2d(
width,
width,
kernel_size=3,
stride=stride,
padding=1,
groups=groups,
bias=False
)
self.bn2 = nn.BatchNorm2d(width)
self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.se_module = SEModule(planes * 4, reduction=reduction)
self.downsample = downsample
self.stride = stride
class SENet(nn.Module):
"""Squeeze-and-excitation network.
Reference:
Hu et al. Squeeze-and-Excitation Networks. CVPR 2018.
Public keys:
- ``senet154``: SENet154.
- ``se_resnet50``: ResNet50 + SE.
- ``se_resnet101``: ResNet101 + SE.
- ``se_resnet152``: ResNet152 + SE.
- ``se_resnext50_32x4d``: ResNeXt50 (groups=32, width=4) + SE.
- ``se_resnext101_32x4d``: ResNeXt101 (groups=32, width=4) + SE.
- ``se_resnet50_fc512``: (ResNet50 + SE) + FC.
"""
def __init__(
self,
num_classes,
loss,
block,
layers,
groups,
reduction,
dropout_p=0.2,
inplanes=128,
input_3x3=True,
downsample_kernel_size=3,
downsample_padding=1,
last_stride=2,
fc_dims=None,
**kwargs
):
"""
Parameters
----------
block (nn.Module): Bottleneck class.
- For SENet154: SEBottleneck
- For SE-ResNet models: SEResNetBottleneck
- For SE-ResNeXt models: SEResNeXtBottleneck
layers (list of ints): Number of residual blocks for 4 layers of the
network (layer1...layer4).
groups (int): Number of groups for the 3x3 convolution in each
bottleneck block.
- For SENet154: 64
- For SE-ResNet models: 1
- For SE-ResNeXt models: 32
reduction (int): Reduction ratio for Squeeze-and-Excitation modules.
- For all models: 16
dropout_p (float or None): Drop probability for the Dropout layer.
If `None` the Dropout layer is not used.
- For SENet154: 0.2
- For SE-ResNet models: None
- For SE-ResNeXt models: None
inplanes (int): Number of input channels for layer1.
- For SENet154: 128
- For SE-ResNet models: 64
- For SE-ResNeXt models: 64
input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
a single 7x7 convolution in layer0.
- For SENet154: True
- For SE-ResNet models: False
- For SE-ResNeXt models: False
downsample_kernel_size (int): Kernel size for downsampling convolutions
in layer2, layer3 and layer4.
- For SENet154: 3
- For SE-ResNet models: 1
- For SE-ResNeXt models: 1
downsample_padding (int): Padding for downsampling convolutions in
layer2, layer3 and layer4.
- For SENet154: 1
- For SE-ResNet models: 0
- For SE-ResNeXt models: 0
num_classes (int): Number of outputs in `classifier` layer.
"""
super(SENet, self).__init__()
self.inplanes = inplanes
self.loss = loss
if input_3x3:
layer0_modules = [
(
'conv1',
nn.Conv2d(3, 64, 3, stride=2, padding=1, bias=False)
),
('bn1', nn.BatchNorm2d(64)),
('relu1', nn.ReLU(inplace=True)),
(
'conv2',
nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False)
),
('bn2', nn.BatchNorm2d(64)),
('relu2', nn.ReLU(inplace=True)),
(
'conv3',
nn.Conv2d(
64, inplanes, 3, stride=1, padding=1, bias=False
)
),
('bn3', nn.BatchNorm2d(inplanes)),
('relu3', nn.ReLU(inplace=True)),
]
else:
layer0_modules = [
(
'conv1',
nn.Conv2d(
3,
inplanes,
kernel_size=7,
stride=2,
padding=3,
bias=False
)
),
('bn1', nn.BatchNorm2d(inplanes)),
('relu1', nn.ReLU(inplace=True)),
]
# To preserve compatibility with Caffe weights `ceil_mode=True`
# is used instead of `padding=1`.
layer0_modules.append(
('pool', nn.MaxPool2d(3, stride=2, ceil_mode=True))
)
self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
self.layer1 = self._make_layer(
block,
planes=64,
blocks=layers[0],
groups=groups,
reduction=reduction,
downsample_kernel_size=1,
downsample_padding=0
)
self.layer2 = self._make_layer(
block,
planes=128,
blocks=layers[1],
stride=2,
groups=groups,
reduction=reduction,
downsample_kernel_size=downsample_kernel_size,
downsample_padding=downsample_padding
)
self.layer3 = self._make_layer(
block,
planes=256,
blocks=layers[2],
stride=2,
groups=groups,
reduction=reduction,
downsample_kernel_size=downsample_kernel_size,
downsample_padding=downsample_padding
)
self.layer4 = self._make_layer(
block,
planes=512,
blocks=layers[3],
stride=last_stride,
groups=groups,
reduction=reduction,
downsample_kernel_size=downsample_kernel_size,
downsample_padding=downsample_padding
)
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.fc = self._construct_fc_layer(
fc_dims, 512 * block.expansion, dropout_p
)
self.classifier = nn.Linear(self.feature_dim, num_classes)
def _make_layer(
self,
block,
planes,
blocks,
groups,
reduction,
stride=1,
downsample_kernel_size=1,
downsample_padding=0
):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(
self.inplanes,
planes * block.expansion,
kernel_size=downsample_kernel_size,
stride=stride,
padding=downsample_padding,
bias=False
),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(
block(
self.inplanes, planes, groups, reduction, stride, downsample
)
)
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, groups, reduction))
return nn.Sequential(*layers)
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
"""
Construct fully connected layer
- fc_dims (list or tuple): dimensions of fc layers, if None,
no fc layers are constructed
- input_dim (int): input dimension
- dropout_p (float): dropout probability, if None, dropout is unused
"""
if fc_dims is None:
self.feature_dim = input_dim
return None
assert isinstance(
fc_dims, (list, tuple)
), 'fc_dims must be either list or tuple, but got {}'.format(
type(fc_dims)
)
layers = []
for dim in fc_dims:
layers.append(nn.Linear(input_dim, dim))
layers.append(nn.BatchNorm1d(dim))
layers.append(nn.ReLU(inplace=True))
if dropout_p is not None:
layers.append(nn.Dropout(p=dropout_p))
input_dim = dim
self.feature_dim = fc_dims[-1]
return nn.Sequential(*layers)
def featuremaps(self, x):
x = self.layer0(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
return x
def forward(self, x):
f = self.featuremaps(x)
v = self.global_avgpool(f)
v = v.view(v.size(0), -1)
if self.fc is not None:
v = self.fc(v)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
pretrain_dict = model_zoo.load_url(model_url)
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
def senet154(num_classes, loss='softmax', pretrained=True, **kwargs):
model = SENet(
num_classes=num_classes,
loss=loss,
block=SEBottleneck,
layers=[3, 8, 36, 3],
groups=64,
reduction=16,
dropout_p=0.2,
last_stride=2,
fc_dims=None,
**kwargs
)
if pretrained:
model_url = pretrained_settings['senet154']['imagenet']['url']
init_pretrained_weights(model, model_url)
return model
def se_resnet50(num_classes, loss='softmax', pretrained=True, **kwargs):
model = SENet(
num_classes=num_classes,
loss=loss,
block=SEResNetBottleneck,
layers=[3, 4, 6, 3],
groups=1,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
last_stride=2,
fc_dims=None,
**kwargs
)
if pretrained:
model_url = pretrained_settings['se_resnet50']['imagenet']['url']
init_pretrained_weights(model, model_url)
return model
def se_resnet50_fc512(num_classes, loss='softmax', pretrained=True, **kwargs):
model = SENet(
num_classes=num_classes,
loss=loss,
block=SEResNetBottleneck,
layers=[3, 4, 6, 3],
groups=1,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
last_stride=1,
fc_dims=[512],
**kwargs
)
if pretrained:
model_url = pretrained_settings['se_resnet50']['imagenet']['url']
init_pretrained_weights(model, model_url)
return model
def se_resnet101(num_classes, loss='softmax', pretrained=True, **kwargs):
model = SENet(
num_classes=num_classes,
loss=loss,
block=SEResNetBottleneck,
layers=[3, 4, 23, 3],
groups=1,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
last_stride=2,
fc_dims=None,
**kwargs
)
if pretrained:
model_url = pretrained_settings['se_resnet101']['imagenet']['url']
init_pretrained_weights(model, model_url)
return model
def se_resnet152(num_classes, loss='softmax', pretrained=True, **kwargs):
model = SENet(
num_classes=num_classes,
loss=loss,
block=SEResNetBottleneck,
layers=[3, 8, 36, 3],
groups=1,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
last_stride=2,
fc_dims=None,
**kwargs
)
if pretrained:
model_url = pretrained_settings['se_resnet152']['imagenet']['url']
init_pretrained_weights(model, model_url)
return model
def se_resnext50_32x4d(num_classes, loss='softmax', pretrained=True, **kwargs):
model = SENet(
num_classes=num_classes,
loss=loss,
block=SEResNeXtBottleneck,
layers=[3, 4, 6, 3],
groups=32,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
last_stride=2,
fc_dims=None,
**kwargs
)
if pretrained:
model_url = pretrained_settings['se_resnext50_32x4d']['imagenet']['url'
]
init_pretrained_weights(model, model_url)
return model
def se_resnext101_32x4d(
num_classes, loss='softmax', pretrained=True, **kwargs
):
model = SENet(
num_classes=num_classes,
loss=loss,
block=SEResNeXtBottleneck,
layers=[3, 4, 23, 3],
groups=32,
reduction=16,
dropout_p=None,
inplanes=64,
input_3x3=False,
downsample_kernel_size=1,
downsample_padding=0,
last_stride=2,
fc_dims=None,
**kwargs
)
if pretrained:
model_url = pretrained_settings['se_resnext101_32x4d']['imagenet'][
'url']
init_pretrained_weights(model, model_url)
return model

View file

@ -0,0 +1,198 @@
from __future__ import division, absolute_import
import torch
import torch.utils.model_zoo as model_zoo
from torch import nn
from torch.nn import functional as F
__all__ = ['shufflenet']
model_urls = {
# training epoch = 90, top1 = 61.8
'imagenet':
'https://mega.nz/#!RDpUlQCY!tr_5xBEkelzDjveIYBBcGcovNCOrgfiJO9kiidz9fZM',
}
class ChannelShuffle(nn.Module):
def __init__(self, num_groups):
super(ChannelShuffle, self).__init__()
self.g = num_groups
def forward(self, x):
b, c, h, w = x.size()
n = c // self.g
# reshape
x = x.view(b, self.g, n, h, w)
# transpose
x = x.permute(0, 2, 1, 3, 4).contiguous()
# flatten
x = x.view(b, c, h, w)
return x
class Bottleneck(nn.Module):
def __init__(
self,
in_channels,
out_channels,
stride,
num_groups,
group_conv1x1=True
):
super(Bottleneck, self).__init__()
assert stride in [1, 2], 'Warning: stride must be either 1 or 2'
self.stride = stride
mid_channels = out_channels // 4
if stride == 2:
out_channels -= in_channels
# group conv is not applied to first conv1x1 at stage 2
num_groups_conv1x1 = num_groups if group_conv1x1 else 1
self.conv1 = nn.Conv2d(
in_channels,
mid_channels,
1,
groups=num_groups_conv1x1,
bias=False
)
self.bn1 = nn.BatchNorm2d(mid_channels)
self.shuffle1 = ChannelShuffle(num_groups)
self.conv2 = nn.Conv2d(
mid_channels,
mid_channels,
3,
stride=stride,
padding=1,
groups=mid_channels,
bias=False
)
self.bn2 = nn.BatchNorm2d(mid_channels)
self.conv3 = nn.Conv2d(
mid_channels, out_channels, 1, groups=num_groups, bias=False
)
self.bn3 = nn.BatchNorm2d(out_channels)
if stride == 2:
self.shortcut = nn.AvgPool2d(3, stride=2, padding=1)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.shuffle1(out)
out = self.bn2(self.conv2(out))
out = self.bn3(self.conv3(out))
if self.stride == 2:
res = self.shortcut(x)
out = F.relu(torch.cat([res, out], 1))
else:
out = F.relu(x + out)
return out
# configuration of (num_groups: #out_channels) based on Table 1 in the paper
cfg = {
1: [144, 288, 576],
2: [200, 400, 800],
3: [240, 480, 960],
4: [272, 544, 1088],
8: [384, 768, 1536],
}
class ShuffleNet(nn.Module):
"""ShuffleNet.
Reference:
Zhang et al. ShuffleNet: An Extremely Efficient Convolutional Neural
Network for Mobile Devices. CVPR 2018.
Public keys:
- ``shufflenet``: ShuffleNet (groups=3).
"""
def __init__(self, num_classes, loss='softmax', num_groups=3, **kwargs):
super(ShuffleNet, self).__init__()
self.loss = loss
self.conv1 = nn.Sequential(
nn.Conv2d(3, 24, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(24),
nn.ReLU(),
nn.MaxPool2d(3, stride=2, padding=1),
)
self.stage2 = nn.Sequential(
Bottleneck(
24, cfg[num_groups][0], 2, num_groups, group_conv1x1=False
),
Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),
Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),
Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),
)
self.stage3 = nn.Sequential(
Bottleneck(cfg[num_groups][0], cfg[num_groups][1], 2, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
)
self.stage4 = nn.Sequential(
Bottleneck(cfg[num_groups][1], cfg[num_groups][2], 2, num_groups),
Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),
Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),
Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),
)
self.classifier = nn.Linear(cfg[num_groups][2], num_classes)
self.feat_dim = cfg[num_groups][2]
def forward(self, x):
x = self.conv1(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), -1)
if not self.training:
return x
y = self.classifier(x)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, x
else:
raise KeyError('Unsupported loss: {}'.format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
pretrain_dict = model_zoo.load_url(model_url)
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
def shufflenet(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ShuffleNet(num_classes, loss, **kwargs)
if pretrained:
# init_pretrained_weights(model, model_urls['imagenet'])
import warnings
warnings.warn(
'The imagenet pretrained weights need to be manually downloaded from {}'
.format(model_urls['imagenet'])
)
return model

View file

@ -0,0 +1,262 @@
"""
Code source: https://github.com/pytorch/vision
"""
from __future__ import division, absolute_import
import torch
import torch.utils.model_zoo as model_zoo
from torch import nn
__all__ = [
'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5',
'shufflenet_v2_x2_0'
]
model_urls = {
'shufflenetv2_x0.5':
'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth',
'shufflenetv2_x1.0':
'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth',
'shufflenetv2_x1.5': None,
'shufflenetv2_x2.0': None,
}
def channel_shuffle(x, groups):
batchsize, num_channels, height, width = x.data.size()
channels_per_group = num_channels // groups
# reshape
x = x.view(batchsize, groups, channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
# flatten
x = x.view(batchsize, -1, height, width)
return x
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride):
super(InvertedResidual, self).__init__()
if not (1 <= stride <= 3):
raise ValueError('illegal stride value')
self.stride = stride
branch_features = oup // 2
assert (self.stride != 1) or (inp == branch_features << 1)
if self.stride > 1:
self.branch1 = nn.Sequential(
self.depthwise_conv(
inp, inp, kernel_size=3, stride=self.stride, padding=1
),
nn.BatchNorm2d(inp),
nn.Conv2d(
inp,
branch_features,
kernel_size=1,
stride=1,
padding=0,
bias=False
),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
)
self.branch2 = nn.Sequential(
nn.Conv2d(
inp if (self.stride > 1) else branch_features,
branch_features,
kernel_size=1,
stride=1,
padding=0,
bias=False
),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
self.depthwise_conv(
branch_features,
branch_features,
kernel_size=3,
stride=self.stride,
padding=1
),
nn.BatchNorm2d(branch_features),
nn.Conv2d(
branch_features,
branch_features,
kernel_size=1,
stride=1,
padding=0,
bias=False
),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
)
@staticmethod
def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
return nn.Conv2d(
i, o, kernel_size, stride, padding, bias=bias, groups=i
)
def forward(self, x):
if self.stride == 1:
x1, x2 = x.chunk(2, dim=1)
out = torch.cat((x1, self.branch2(x2)), dim=1)
else:
out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
out = channel_shuffle(out, 2)
return out
class ShuffleNetV2(nn.Module):
"""ShuffleNetV2.
Reference:
Ma et al. ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design. ECCV 2018.
Public keys:
- ``shufflenet_v2_x0_5``: ShuffleNetV2 x0.5.
- ``shufflenet_v2_x1_0``: ShuffleNetV2 x1.0.
- ``shufflenet_v2_x1_5``: ShuffleNetV2 x1.5.
- ``shufflenet_v2_x2_0``: ShuffleNetV2 x2.0.
"""
def __init__(
self, num_classes, loss, stages_repeats, stages_out_channels, **kwargs
):
super(ShuffleNetV2, self).__init__()
self.loss = loss
if len(stages_repeats) != 3:
raise ValueError(
'expected stages_repeats as list of 3 positive ints'
)
if len(stages_out_channels) != 5:
raise ValueError(
'expected stages_out_channels as list of 5 positive ints'
)
self._stage_out_channels = stages_out_channels
input_channels = 3
output_channels = self._stage_out_channels[0]
self.conv1 = nn.Sequential(
nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False),
nn.BatchNorm2d(output_channels),
nn.ReLU(inplace=True),
)
input_channels = output_channels
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
stage_names = ['stage{}'.format(i) for i in [2, 3, 4]]
for name, repeats, output_channels in zip(
stage_names, stages_repeats, self._stage_out_channels[1:]
):
seq = [InvertedResidual(input_channels, output_channels, 2)]
for i in range(repeats - 1):
seq.append(
InvertedResidual(output_channels, output_channels, 1)
)
setattr(self, name, nn.Sequential(*seq))
input_channels = output_channels
output_channels = self._stage_out_channels[-1]
self.conv5 = nn.Sequential(
nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False),
nn.BatchNorm2d(output_channels),
nn.ReLU(inplace=True),
)
self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.classifier = nn.Linear(output_channels, num_classes)
def featuremaps(self, x):
x = self.conv1(x)
x = self.maxpool(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
x = self.conv5(x)
return x
def forward(self, x):
f = self.featuremaps(x)
v = self.global_avgpool(f)
v = v.view(v.size(0), -1)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError("Unsupported loss: {}".format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
if model_url is None:
import warnings
warnings.warn(
'ImageNet pretrained weights are unavailable for this model'
)
return
pretrain_dict = model_zoo.load_url(model_url)
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
def shufflenet_v2_x0_5(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ShuffleNetV2(
num_classes, loss, [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['shufflenetv2_x0.5'])
return model
def shufflenet_v2_x1_0(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ShuffleNetV2(
num_classes, loss, [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['shufflenetv2_x1.0'])
return model
def shufflenet_v2_x1_5(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ShuffleNetV2(
num_classes, loss, [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['shufflenetv2_x1.5'])
return model
def shufflenet_v2_x2_0(num_classes, loss='softmax', pretrained=True, **kwargs):
model = ShuffleNetV2(
num_classes, loss, [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['shufflenetv2_x2.0'])
return model

View file

@ -0,0 +1,236 @@
"""
Code source: https://github.com/pytorch/vision
"""
from __future__ import division, absolute_import
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
__all__ = ['squeezenet1_0', 'squeezenet1_1', 'squeezenet1_0_fc512']
model_urls = {
'squeezenet1_0':
'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth',
'squeezenet1_1':
'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth',
}
class Fire(nn.Module):
def __init__(
self, inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes
):
super(Fire, self).__init__()
self.inplanes = inplanes
self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
self.squeeze_activation = nn.ReLU(inplace=True)
self.expand1x1 = nn.Conv2d(
squeeze_planes, expand1x1_planes, kernel_size=1
)
self.expand1x1_activation = nn.ReLU(inplace=True)
self.expand3x3 = nn.Conv2d(
squeeze_planes, expand3x3_planes, kernel_size=3, padding=1
)
self.expand3x3_activation = nn.ReLU(inplace=True)
def forward(self, x):
x = self.squeeze_activation(self.squeeze(x))
return torch.cat(
[
self.expand1x1_activation(self.expand1x1(x)),
self.expand3x3_activation(self.expand3x3(x))
], 1
)
class SqueezeNet(nn.Module):
"""SqueezeNet.
Reference:
Iandola et al. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters
and< 0.5 MB model size. arXiv:1602.07360.
Public keys:
- ``squeezenet1_0``: SqueezeNet (version=1.0).
- ``squeezenet1_1``: SqueezeNet (version=1.1).
- ``squeezenet1_0_fc512``: SqueezeNet (version=1.0) + FC.
"""
def __init__(
self,
num_classes,
loss,
version=1.0,
fc_dims=None,
dropout_p=None,
**kwargs
):
super(SqueezeNet, self).__init__()
self.loss = loss
self.feature_dim = 512
if version not in [1.0, 1.1]:
raise ValueError(
'Unsupported SqueezeNet version {version}:'
'1.0 or 1.1 expected'.format(version=version)
)
if version == 1.0:
self.features = nn.Sequential(
nn.Conv2d(3, 96, kernel_size=7, stride=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(96, 16, 64, 64),
Fire(128, 16, 64, 64),
Fire(128, 32, 128, 128),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(256, 32, 128, 128),
Fire(256, 48, 192, 192),
Fire(384, 48, 192, 192),
Fire(384, 64, 256, 256),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(512, 64, 256, 256),
)
else:
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(64, 16, 64, 64),
Fire(128, 16, 64, 64),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(128, 32, 128, 128),
Fire(256, 32, 128, 128),
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
Fire(256, 48, 192, 192),
Fire(384, 48, 192, 192),
Fire(384, 64, 256, 256),
Fire(512, 64, 256, 256),
)
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.fc = self._construct_fc_layer(fc_dims, 512, dropout_p)
self.classifier = nn.Linear(self.feature_dim, num_classes)
self._init_params()
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
"""Constructs fully connected layer
Args:
fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
input_dim (int): input dimension
dropout_p (float): dropout probability, if None, dropout is unused
"""
if fc_dims is None:
self.feature_dim = input_dim
return None
assert isinstance(
fc_dims, (list, tuple)
), 'fc_dims must be either list or tuple, but got {}'.format(
type(fc_dims)
)
layers = []
for dim in fc_dims:
layers.append(nn.Linear(input_dim, dim))
layers.append(nn.BatchNorm1d(dim))
layers.append(nn.ReLU(inplace=True))
if dropout_p is not None:
layers.append(nn.Dropout(p=dropout_p))
input_dim = dim
self.feature_dim = fc_dims[-1]
return nn.Sequential(*layers)
def _init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu'
)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
f = self.features(x)
v = self.global_avgpool(f)
v = v.view(v.size(0), -1)
if self.fc is not None:
v = self.fc(v)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError('Unsupported loss: {}'.format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initializes model with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
pretrain_dict = model_zoo.load_url(model_url, map_location=None)
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
def squeezenet1_0(num_classes, loss='softmax', pretrained=True, **kwargs):
model = SqueezeNet(
num_classes, loss, version=1.0, fc_dims=None, dropout_p=None, **kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['squeezenet1_0'])
return model
def squeezenet1_0_fc512(
num_classes, loss='softmax', pretrained=True, **kwargs
):
model = SqueezeNet(
num_classes,
loss,
version=1.0,
fc_dims=[512],
dropout_p=None,
**kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['squeezenet1_0'])
return model
def squeezenet1_1(num_classes, loss='softmax', pretrained=True, **kwargs):
model = SqueezeNet(
num_classes, loss, version=1.1, fc_dims=None, dropout_p=None, **kwargs
)
if pretrained:
init_pretrained_weights(model, model_urls['squeezenet1_1'])
return model

View file

@ -0,0 +1,344 @@
from __future__ import division, absolute_import
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
__all__ = ['xception']
pretrained_settings = {
'xception': {
'imagenet': {
'url':
'http://data.lip6.fr/cadene/pretrainedmodels/xception-43020ad28.pth',
'input_space': 'RGB',
'input_size': [3, 299, 299],
'input_range': [0, 1],
'mean': [0.5, 0.5, 0.5],
'std': [0.5, 0.5, 0.5],
'num_classes': 1000,
'scale':
0.8975 # The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299
}
}
}
class SeparableConv2d(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size=1,
stride=1,
padding=0,
dilation=1,
bias=False
):
super(SeparableConv2d, self).__init__()
self.conv1 = nn.Conv2d(
in_channels,
in_channels,
kernel_size,
stride,
padding,
dilation,
groups=in_channels,
bias=bias
)
self.pointwise = nn.Conv2d(
in_channels, out_channels, 1, 1, 0, 1, 1, bias=bias
)
def forward(self, x):
x = self.conv1(x)
x = self.pointwise(x)
return x
class Block(nn.Module):
def __init__(
self,
in_filters,
out_filters,
reps,
strides=1,
start_with_relu=True,
grow_first=True
):
super(Block, self).__init__()
if out_filters != in_filters or strides != 1:
self.skip = nn.Conv2d(
in_filters, out_filters, 1, stride=strides, bias=False
)
self.skipbn = nn.BatchNorm2d(out_filters)
else:
self.skip = None
self.relu = nn.ReLU(inplace=True)
rep = []
filters = in_filters
if grow_first:
rep.append(self.relu)
rep.append(
SeparableConv2d(
in_filters,
out_filters,
3,
stride=1,
padding=1,
bias=False
)
)
rep.append(nn.BatchNorm2d(out_filters))
filters = out_filters
for i in range(reps - 1):
rep.append(self.relu)
rep.append(
SeparableConv2d(
filters, filters, 3, stride=1, padding=1, bias=False
)
)
rep.append(nn.BatchNorm2d(filters))
if not grow_first:
rep.append(self.relu)
rep.append(
SeparableConv2d(
in_filters,
out_filters,
3,
stride=1,
padding=1,
bias=False
)
)
rep.append(nn.BatchNorm2d(out_filters))
if not start_with_relu:
rep = rep[1:]
else:
rep[0] = nn.ReLU(inplace=False)
if strides != 1:
rep.append(nn.MaxPool2d(3, strides, 1))
self.rep = nn.Sequential(*rep)
def forward(self, inp):
x = self.rep(inp)
if self.skip is not None:
skip = self.skip(inp)
skip = self.skipbn(skip)
else:
skip = inp
x += skip
return x
class Xception(nn.Module):
"""Xception.
Reference:
Chollet. Xception: Deep Learning with Depthwise
Separable Convolutions. CVPR 2017.
Public keys:
- ``xception``: Xception.
"""
def __init__(
self, num_classes, loss, fc_dims=None, dropout_p=None, **kwargs
):
super(Xception, self).__init__()
self.loss = loss
self.conv1 = nn.Conv2d(3, 32, 3, 2, 0, bias=False)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, 3, bias=False)
self.bn2 = nn.BatchNorm2d(64)
self.block1 = Block(
64, 128, 2, 2, start_with_relu=False, grow_first=True
)
self.block2 = Block(
128, 256, 2, 2, start_with_relu=True, grow_first=True
)
self.block3 = Block(
256, 728, 2, 2, start_with_relu=True, grow_first=True
)
self.block4 = Block(
728, 728, 3, 1, start_with_relu=True, grow_first=True
)
self.block5 = Block(
728, 728, 3, 1, start_with_relu=True, grow_first=True
)
self.block6 = Block(
728, 728, 3, 1, start_with_relu=True, grow_first=True
)
self.block7 = Block(
728, 728, 3, 1, start_with_relu=True, grow_first=True
)
self.block8 = Block(
728, 728, 3, 1, start_with_relu=True, grow_first=True
)
self.block9 = Block(
728, 728, 3, 1, start_with_relu=True, grow_first=True
)
self.block10 = Block(
728, 728, 3, 1, start_with_relu=True, grow_first=True
)
self.block11 = Block(
728, 728, 3, 1, start_with_relu=True, grow_first=True
)
self.block12 = Block(
728, 1024, 2, 2, start_with_relu=True, grow_first=False
)
self.conv3 = SeparableConv2d(1024, 1536, 3, 1, 1)
self.bn3 = nn.BatchNorm2d(1536)
self.conv4 = SeparableConv2d(1536, 2048, 3, 1, 1)
self.bn4 = nn.BatchNorm2d(2048)
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
self.feature_dim = 2048
self.fc = self._construct_fc_layer(fc_dims, 2048, dropout_p)
self.classifier = nn.Linear(self.feature_dim, num_classes)
self._init_params()
def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
"""Constructs fully connected layer.
Args:
fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
input_dim (int): input dimension
dropout_p (float): dropout probability, if None, dropout is unused
"""
if fc_dims is None:
self.feature_dim = input_dim
return None
assert isinstance(
fc_dims, (list, tuple)
), 'fc_dims must be either list or tuple, but got {}'.format(
type(fc_dims)
)
layers = []
for dim in fc_dims:
layers.append(nn.Linear(input_dim, dim))
layers.append(nn.BatchNorm1d(dim))
layers.append(nn.ReLU(inplace=True))
if dropout_p is not None:
layers.append(nn.Dropout(p=dropout_p))
input_dim = dim
self.feature_dim = fc_dims[-1]
return nn.Sequential(*layers)
def _init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu'
)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm1d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def featuremaps(self, input):
x = self.conv1(input)
x = self.bn1(x)
x = F.relu(x, inplace=True)
x = self.conv2(x)
x = self.bn2(x)
x = F.relu(x, inplace=True)
x = self.block1(x)
x = self.block2(x)
x = self.block3(x)
x = self.block4(x)
x = self.block5(x)
x = self.block6(x)
x = self.block7(x)
x = self.block8(x)
x = self.block9(x)
x = self.block10(x)
x = self.block11(x)
x = self.block12(x)
x = self.conv3(x)
x = self.bn3(x)
x = F.relu(x, inplace=True)
x = self.conv4(x)
x = self.bn4(x)
x = F.relu(x, inplace=True)
return x
def forward(self, x):
f = self.featuremaps(x)
v = self.global_avgpool(f)
v = v.view(v.size(0), -1)
if self.fc is not None:
v = self.fc(v)
if not self.training:
return v
y = self.classifier(v)
if self.loss == 'softmax':
return y
elif self.loss == 'triplet':
return y, v
else:
raise KeyError('Unsupported loss: {}'.format(self.loss))
def init_pretrained_weights(model, model_url):
"""Initialize models with pretrained weights.
Layers that don't match with pretrained layers in name or size are kept unchanged.
"""
pretrain_dict = model_zoo.load_url(model_url)
model_dict = model.state_dict()
pretrain_dict = {
k: v
for k, v in pretrain_dict.items()
if k in model_dict and model_dict[k].size() == v.size()
}
model_dict.update(pretrain_dict)
model.load_state_dict(model_dict)
def xception(num_classes, loss='softmax', pretrained=True, **kwargs):
model = Xception(num_classes, loss, fc_dims=None, dropout_p=None, **kwargs)
if pretrained:
model_url = pretrained_settings['xception']['imagenet']['url']
init_pretrained_weights(model, model_url)
return model

View file

@ -0,0 +1,215 @@
import torch
from collections import OrderedDict
__model_types = [
'resnet50', 'mlfn', 'hacnn', 'mobilenetv2_x1_0', 'mobilenetv2_x1_4',
'osnet_x1_0', 'osnet_x0_75', 'osnet_x0_5', 'osnet_x0_25',
'osnet_ibn_x1_0', 'osnet_ain_x1_0']
__trained_urls = {
# market1501 models ########################################################
'resnet50_market1501.pt':
'https://drive.google.com/uc?id=1dUUZ4rHDWohmsQXCRe2C_HbYkzz94iBV',
'resnet50_dukemtmcreid.pt':
'https://drive.google.com/uc?id=17ymnLglnc64NRvGOitY3BqMRS9UWd1wg',
'resnet50_msmt17.pt':
'https://drive.google.com/uc?id=1ep7RypVDOthCRIAqDnn4_N-UhkkFHJsj',
'resnet50_fc512_market1501.pt':
'https://drive.google.com/uc?id=1kv8l5laX_YCdIGVCetjlNdzKIA3NvsSt',
'resnet50_fc512_dukemtmcreid.pt':
'https://drive.google.com/uc?id=13QN8Mp3XH81GK4BPGXobKHKyTGH50Rtx',
'resnet50_fc512_msmt17.pt':
'https://drive.google.com/uc?id=1fDJLcz4O5wxNSUvImIIjoaIF9u1Rwaud',
'mlfn_market1501.pt':
'https://drive.google.com/uc?id=1wXcvhA_b1kpDfrt9s2Pma-MHxtj9pmvS',
'mlfn_dukemtmcreid.pt':
'https://drive.google.com/uc?id=1rExgrTNb0VCIcOnXfMsbwSUW1h2L1Bum',
'mlfn_msmt17.pt':
'https://drive.google.com/uc?id=18JzsZlJb3Wm7irCbZbZ07TN4IFKvR6p-',
'hacnn_market1501.pt':
'https://drive.google.com/uc?id=1LRKIQduThwGxMDQMiVkTScBwR7WidmYF',
'hacnn_dukemtmcreid.pt':
'https://drive.google.com/uc?id=1zNm6tP4ozFUCUQ7Sv1Z98EAJWXJEhtYH',
'hacnn_msmt17.pt':
'https://drive.google.com/uc?id=1MsKRtPM5WJ3_Tk2xC0aGOO7pM3VaFDNZ',
'mobilenetv2_x1_0_market1501.pt':
'https://drive.google.com/uc?id=18DgHC2ZJkjekVoqBWszD8_Xiikz-fewp',
'mobilenetv2_x1_0_dukemtmcreid.pt':
'https://drive.google.com/uc?id=1q1WU2FETRJ3BXcpVtfJUuqq4z3psetds',
'mobilenetv2_x1_0_msmt17.pt':
'https://drive.google.com/uc?id=1j50Hv14NOUAg7ZeB3frzfX-WYLi7SrhZ',
'mobilenetv2_x1_4_market1501.pt':
'https://drive.google.com/uc?id=1t6JCqphJG-fwwPVkRLmGGyEBhGOf2GO5',
'mobilenetv2_x1_4_dukemtmcreid.pt':
'https://drive.google.com/uc?id=12uD5FeVqLg9-AFDju2L7SQxjmPb4zpBN',
'mobilenetv2_x1_4_msmt17.pt':
'https://drive.google.com/uc?id=1ZY5P2Zgm-3RbDpbXM0kIBMPvspeNIbXz',
'osnet_x1_0_market1501.pt':
'https://drive.google.com/uc?id=1vduhq5DpN2q1g4fYEZfPI17MJeh9qyrA',
'osnet_x1_0_dukemtmcreid.pt':
'https://drive.google.com/uc?id=1QZO_4sNf4hdOKKKzKc-TZU9WW1v6zQbq',
'osnet_x1_0_msmt17.pt':
'https://drive.google.com/uc?id=112EMUfBPYeYg70w-syK6V6Mx8-Qb9Q1M',
'osnet_x0_75_market1501.pt':
'https://drive.google.com/uc?id=1ozRaDSQw_EQ8_93OUmjDbvLXw9TnfPer',
'osnet_x0_75_dukemtmcreid.pt':
'https://drive.google.com/uc?id=1IE3KRaTPp4OUa6PGTFL_d5_KQSJbP0Or',
'osnet_x0_75_msmt17.pt':
'https://drive.google.com/uc?id=1QEGO6WnJ-BmUzVPd3q9NoaO_GsPNlmWc',
'osnet_x0_5_market1501.pt':
'https://drive.google.com/uc?id=1PLB9rgqrUM7blWrg4QlprCuPT7ILYGKT',
'osnet_x0_5_dukemtmcreid.pt':
'https://drive.google.com/uc?id=1KoUVqmiST175hnkALg9XuTi1oYpqcyTu',
'osnet_x0_5_msmt17.pt':
'https://drive.google.com/uc?id=1UT3AxIaDvS2PdxzZmbkLmjtiqq7AIKCv',
'osnet_x0_25_market1501.pt':
'https://drive.google.com/uc?id=1z1UghYvOTtjx7kEoRfmqSMu-z62J6MAj',
'osnet_x0_25_dukemtmcreid.pt':
'https://drive.google.com/uc?id=1eumrtiXT4NOspjyEV4j8cHmlOaaCGk5l',
'osnet_x0_25_msmt17.pt':
'https://drive.google.com/uc?id=1sSwXSUlj4_tHZequ_iZ8w_Jh0VaRQMqF',
####### market1501 models ##################################################
'resnet50_msmt17.pt':
'https://drive.google.com/uc?id=1yiBteqgIZoOeywE8AhGmEQl7FTVwrQmf',
'osnet_x1_0_msmt17.pt':
'https://drive.google.com/uc?id=1IosIFlLiulGIjwW3H8uMRmx3MzPwf86x',
'osnet_x0_75_msmt17.pt':
'https://drive.google.com/uc?id=1fhjSS_7SUGCioIf2SWXaRGPqIY9j7-uw',
'osnet_x0_5_msmt17.pt':
'https://drive.google.com/uc?id=1DHgmb6XV4fwG3n-CnCM0zdL9nMsZ9_RF',
'osnet_x0_25_msmt17.pt':
'https://drive.google.com/uc?id=1Kkx2zW89jq_NETu4u42CFZTMVD5Hwm6e',
'osnet_ibn_x1_0_msmt17.pt':
'https://drive.google.com/uc?id=1q3Sj2ii34NlfxA4LvmHdWO_75NDRmECJ',
'osnet_ain_x1_0_msmt17.pt':
'https://drive.google.com/uc?id=1SigwBE6mPdqiJMqhuIY4aqC7--5CsMal',
}
def show_downloadeable_models():
print('\nAvailable .pt ReID models for automatic download')
print(list(__trained_urls.keys()))
def get_model_url(model):
if model.name in __trained_urls:
return __trained_urls[model.name]
else:
None
def is_model_in_model_types(model):
if model.name in __model_types:
return True
else:
return False
def get_model_name(model):
for x in __model_types:
if x in model.name:
return x
return None
def download_url(url, dst):
"""Downloads file from a url to a destination.
Args:
url (str): url to download file.
dst (str): destination path.
"""
from six.moves import urllib
print('* url="{}"'.format(url))
print('* destination="{}"'.format(dst))
def _reporthook(count, block_size, total_size):
global start_time
if count == 0:
start_time = time.time()
return
duration = time.time() - start_time
progress_size = int(count * block_size)
speed = int(progress_size / (1024*duration))
percent = int(count * block_size * 100 / total_size)
sys.stdout.write(
'\r...%d%%, %d MB, %d KB/s, %d seconds passed' %
(percent, progress_size / (1024*1024), speed, duration)
)
sys.stdout.flush()
urllib.request.urlretrieve(url, dst, _reporthook)
sys.stdout.write('\n')
def load_pretrained_weights(model, weight_path):
r"""Loads pretrianed weights to model.
Features::
- Incompatible layers (unmatched in name or size) will be ignored.
- Can automatically deal with keys containing "module.".
Args:
model (nn.Module): network model.
weight_path (str): path to pretrained weights.
Examples::
>>> from torchreid.utils import load_pretrained_weights
>>> weight_path = 'log/my_model/model-best.pth.tar'
>>> load_pretrained_weights(model, weight_path)
"""
checkpoint = torch.load(weight_path)
if 'state_dict' in checkpoint:
state_dict = checkpoint['state_dict']
else:
state_dict = checkpoint
model_dict = model.state_dict()
new_state_dict = OrderedDict()
matched_layers, discarded_layers = [], []
for k, v in state_dict.items():
if k.startswith('module.'):
k = k[7:] # discard module.
if k in model_dict and model_dict[k].size() == v.size():
new_state_dict[k] = v
matched_layers.append(k)
else:
discarded_layers.append(k)
model_dict.update(new_state_dict)
model.load_state_dict(model_dict)
if len(matched_layers) == 0:
warnings.warn(
'The pretrained weights "{}" cannot be loaded, '
'please check the key names manually '
'(** ignored and continue **)'.format(weight_path)
)
else:
print(
'Successfully loaded pretrained weights from "{}"'.
format(weight_path)
)
if len(discarded_layers) > 0:
print(
'** The following layers are discarded '
'due to unmatched keys or layer size: {}'.
format(discarded_layers)
)