123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518 |
- # D:/workplace/python
- # -*- coding: utf-8 -*-
- # @File :resnext.py
- # @Author:Guido LuXiaohao
- # @Date :2020/4/8
- # @Software:PyCharm
- '''ResNeXt models for Keras.
- This is a revised implementation from Somshubra Majumdar's SENet repo:
- (https://github.com/titu1994/keras-squeeze-excite-network)
- # Reference
- - [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/pdf/1611.05431.pdf))
- '''
- from __future__ import print_function
- from __future__ import absolute_import
- from __future__ import division
- import warnings
- from keras.models import Model
- from keras.layers.core import Dense, Lambda
- from keras.layers.advanced_activations import LeakyReLU
- from keras.layers.convolutional import Conv2D
- from keras.layers.pooling import GlobalAveragePooling2D, GlobalMaxPooling2D, MaxPooling2D
- from keras.layers import Input
- from keras.layers.merge import concatenate, add
- from keras.layers.normalization import BatchNormalization
- from keras.regularizers import l2
- from keras.utils.layer_utils import convert_all_kernels_in_model
- from keras.utils.data_utils import get_file
- from keras.engine.topology import get_source_inputs
- from keras_applications.imagenet_utils import _obtain_input_shape
- import keras.backend as K
- from nets.attention_module.se_cbam import attach_attention_module
- CIFAR_TH_WEIGHTS_PATH = ''
- CIFAR_TF_WEIGHTS_PATH = ''
- CIFAR_TH_WEIGHTS_PATH_NO_TOP = ''
- CIFAR_TF_WEIGHTS_PATH_NO_TOP = ''
- IMAGENET_TH_WEIGHTS_PATH = ''
- IMAGENET_TF_WEIGHTS_PATH = ''
- IMAGENET_TH_WEIGHTS_PATH_NO_TOP = ''
- IMAGENET_TF_WEIGHTS_PATH_NO_TOP = ''
- def ResNext(input_shape=None,
- depth=29,
- cardinality=8,
- width=64,
- weight_decay=5e-4,
- include_top=True,
- weights=None,
- input_tensor=None,
- pooling=None,
- classes=10,
- attention_module=None):
- """Instantiate the ResNeXt architecture. Note that ,
- when using TensorFlow for best performance you should set
- `image_data_format="channels_last"` in your Keras config
- at ~/.keras/keras.json.
- The model are compatible with both
- TensorFlow and Theano. The dimension ordering
- convention used by the model is the one
- specified in your Keras config file.
- # Arguments
- depth: number or layers in the ResNeXt model. Can be an
- integer or a list of integers.
- cardinality: the size of the set of transformations
- width: multiplier to the ResNeXt width (number of filters)
- weight_decay: weight decay (l2 norm)
- include_top: whether to include the fully-connected
- layer at the top of the network.
- weights: `None` (random initialization)
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
- to use as image input for the model.
- input_shape: optional shape tuple, only to be specified
- if `include_top` is False (otherwise the input shape
- has to be `(32, 32, 3)` (with `tf` dim ordering)
- or `(3, 32, 32)` (with `th` dim ordering).
- It should have exactly 3 inputs channels,
- and width and height should be no smaller than 8.
- E.g. `(200, 200, 3)` would be one valid value.
- pooling: Optional pooling mode for feature extraction
- when `include_top` is `False`.
- - `None` means that the output of the model will be
- the 4D tensor output of the
- last convolutional layer.
- - `avg` means that global average pooling
- will be applied to the output of the
- last convolutional layer, and thus
- the output of the model will be a 2D tensor.
- - `max` means that global max pooling will
- be applied.
- classes: optional number of classes to classify images
- into, only to be specified if `include_top` is True, and
- if no `weights` argument is specified.
- # Returns
- A Keras model instance.
- """
- if weights not in {'cifar10', None}:
- raise ValueError('The `weights` argument should be either '
- '`None` (random initialization) or `cifar10` '
- '(pre-training on CIFAR-10).')
- if weights == 'cifar10' and include_top and classes != 10:
- raise ValueError('If using `weights` as CIFAR 10 with `include_top`'
- ' as true, `classes` should be 10')
- if type(depth) == int:
- if (depth - 2) % 9 != 0:
- raise ValueError('Depth of the network must be such that (depth - 2)'
- 'should be divisible by 9.')
- # Determine proper input shape
- input_shape = _obtain_input_shape(input_shape,
- default_size=32,
- min_size=8,
- data_format=K.image_data_format(),
- require_flatten=include_top)
- if input_tensor is None:
- img_input = Input(shape=input_shape)
- else:
- if not K.is_keras_tensor(input_tensor):
- img_input = Input(tensor=input_tensor, shape=input_shape)
- else:
- img_input = input_tensor
- x = __create_res_next(classes, img_input, include_top, depth, cardinality, width,
- weight_decay, pooling, attention_module)
- # Ensure that the model takes into account
- # any potential predecessors of `input_tensor`.
- if input_tensor is not None:
- inputs = get_source_inputs(input_tensor)
- else:
- inputs = img_input
- # Create model.
- model = Model(inputs, x, name='resnext')
- return model
- def ResNextImageNet(input_shape=None,
- depth=[3, 4, 6, 3],
- cardinality=32,
- width=4,
- weight_decay=5e-4,
- include_top=True,
- weights=None,
- input_tensor=None,
- pooling=None,
- classes=1000,
- attention_module=None):
- """ Instantiate the ResNeXt architecture for the ImageNet dataset. Note that ,
- when using TensorFlow for best performance you should set
- `image_data_format="channels_last"` in your Keras config
- at ~/.keras/keras.json.
- The model are compatible with both
- TensorFlow and Theano. The dimension ordering
- convention used by the model is the one
- specified in your Keras config file.
- # Arguments
- depth: number or layers in the each block, defined as a list.
- ResNeXt-50 can be defined as [3, 4, 6, 3].
- ResNeXt-101 can be defined as [3, 4, 23, 3].
- Defaults is ResNeXt-50.
- cardinality: the size of the set of transformations
- width: multiplier to the ResNeXt width (number of filters)
- weight_decay: weight decay (l2 norm)
- include_top: whether to include the fully-connected
- layer at the top of the network.
- weights: `None` (random initialization) or `imagenet` (trained
- on ImageNet)
- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
- to use as image input for the model.
- input_shape: optional shape tuple, only to be specified
- if `include_top` is False (otherwise the input shape
- has to be `(224, 224, 3)` (with `tf` dim ordering)
- or `(3, 224, 224)` (with `th` dim ordering).
- It should have exactly 3 inputs channels,
- and width and height should be no smaller than 8.
- E.g. `(200, 200, 3)` would be one valid value.
- pooling: Optional pooling mode for feature extraction
- when `include_top` is `False`.
- - `None` means that the output of the model will be
- the 4D tensor output of the
- last convolutional layer.
- - `avg` means that global average pooling
- will be applied to the output of the
- last convolutional layer, and thus
- the output of the model will be a 2D tensor.
- - `max` means that global max pooling will
- be applied.
- classes: optional number of classes to classify images
- into, only to be specified if `include_top` is True, and
- if no `weights` argument is specified.
- # Returns
- A Keras model instance.
- """
- if weights not in {'imagenet', None}:
- raise ValueError('The `weights` argument should be either '
- '`None` (random initialization) or `imagenet` '
- '(pre-training on ImageNet).')
- if weights == 'imagenet' and include_top and classes != 1000:
- raise ValueError('If using `weights` as imagenet with `include_top`'
- ' as true, `classes` should be 1000')
- if type(depth) == int and (depth - 2) % 9 != 0:
- raise ValueError('Depth of the network must be such that (depth - 2)'
- 'should be divisible by 9.')
- # Determine proper input shape
- input_shape = _obtain_input_shape(input_shape,
- default_size=224,
- min_size=112,
- data_format=K.image_data_format(),
- require_flatten=include_top)
- if input_tensor is None:
- img_input = Input(shape=input_shape)
- else:
- if not K.is_keras_tensor(input_tensor):
- img_input = Input(tensor=input_tensor, shape=input_shape)
- else:
- img_input = input_tensor
- x = __create_res_next_imagenet(classes, img_input, include_top, depth, cardinality, width,
- weight_decay, pooling)
- # Ensure that the model takes into account
- # any potential predecessors of `input_tensor`.
- if input_tensor is not None:
- inputs = get_source_inputs(input_tensor)
- else:
- inputs = img_input
- # Create model.
- model = Model(inputs, x, name='resnext')
- return model
- def __initial_conv_block(input, weight_decay=5e-4):
- ''' Adds an initial convolution block, with batch normalization and relu activation
- Args:
- input: input tensor
- weight_decay: weight decay factor
- Returns: a keras tensor
- '''
- channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
- x = Conv2D(64, (3, 3), padding='same', use_bias=False, kernel_initializer='he_normal',
- kernel_regularizer=l2(weight_decay))(input)
- x = BatchNormalization(axis=channel_axis)(x)
- x = LeakyReLU()(x)
- return x
- def __initial_conv_block_inception(input, weight_decay=5e-4):
- ''' Adds an initial conv block, with batch norm and relu for the inception resnext
- Args:
- input: input tensor
- weight_decay: weight decay factor
- Returns: a keras tensor
- '''
- channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
- x = Conv2D(64, (7, 7), padding='same', use_bias=False, kernel_initializer='he_normal',
- kernel_regularizer=l2(weight_decay), strides=(2, 2))(input)
- x = BatchNormalization(axis=channel_axis)(x)
- x = LeakyReLU()(x)
- x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
- return x
- def __grouped_convolution_block(input, grouped_channels, cardinality, strides, weight_decay=5e-4):
- ''' Adds a grouped convolution block. It is an equivalent block from the paper
- Args:
- input: input tensor
- grouped_channels: grouped number of filters
- cardinality: cardinality factor describing the number of groups
- strides: performs strided convolution for downscaling if > 1
- weight_decay: weight decay term
- Returns: a keras tensor
- '''
- init = input
- channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
- group_list = []
- if cardinality == 1:
- # with cardinality 1, it is a standard convolution
- x = Conv2D(grouped_channels, (3, 3), padding='same', use_bias=False, strides=(strides, strides),
- kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(init)
- x = BatchNormalization(axis=channel_axis)(x)
- x = LeakyReLU()(x)
- return x
- for c in range(cardinality):
- x = Lambda(lambda z: z[:, :, :, c * grouped_channels:(c + 1) * grouped_channels]
- if K.image_data_format() == 'channels_last' else
- lambda z: z[:, c * grouped_channels:(c + 1) * grouped_channels, :, :])(input)
- x = Conv2D(grouped_channels, (3, 3), padding='same', use_bias=False, strides=(strides, strides),
- kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(x)
- group_list.append(x)
- group_merge = concatenate(group_list, axis=channel_axis)
- x = BatchNormalization(axis=channel_axis)(group_merge)
- x = LeakyReLU()(x)
- return x
- def __bottleneck_block(input, filters=64, cardinality=8, strides=1, weight_decay=5e-4, attention_module=None):
- ''' Adds a bottleneck block
- Args:
- input: input tensor
- filters: number of output filters
- cardinality: cardinality factor described number of
- grouped convolutions
- strides: performs strided convolution for downsampling if > 1
- weight_decay: weight decay factor
- Returns: a keras tensor
- '''
- init = input
- grouped_channels = int(filters / cardinality)
- channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
- # Check if input number of filters is same as 16 * k, else create convolution2d for this input
- if K.image_data_format() == 'channels_first':
- if init._keras_shape[1] != 2 * filters:
- init = Conv2D(filters * 2, (1, 1), padding='same', strides=(strides, strides),
- use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(init)
- init = BatchNormalization(axis=channel_axis)(init)
- else:
- if init._keras_shape[-1] != 2 * filters:
- init = Conv2D(filters * 2, (1, 1), padding='same', strides=(strides, strides),
- use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(init)
- init = BatchNormalization(axis=channel_axis)(init)
- x = Conv2D(filters, (1, 1), padding='same', use_bias=False,
- kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(input)
- x = BatchNormalization(axis=channel_axis)(x)
- x = LeakyReLU()(x)
- x = __grouped_convolution_block(x, grouped_channels, cardinality, strides, weight_decay)
- x = Conv2D(filters * 2, (1, 1), padding='same', use_bias=False, kernel_initializer='he_normal',
- kernel_regularizer=l2(weight_decay))(x)
- x = BatchNormalization(axis=channel_axis)(x)
- # attention_module
- if attention_module is not None:
- x = attach_attention_module(x, attention_module)
- x = add([init, x])
- x = LeakyReLU()(x)
- return x
- def __create_res_next(nb_classes, img_input, include_top, depth=29, cardinality=8, width=4,
- weight_decay=5e-4, pooling=None, attention_module=None):
- ''' Creates a ResNeXt model with specified parameters
- Args:
- nb_classes: Number of output classes
- img_input: Input tensor or layer
- include_top: Flag to include the last dense layer
- depth: Depth of the network. Can be an positive integer or a list
- Compute N = (n - 2) / 9.
- For a depth of 56, n = 56, N = (56 - 2) / 9 = 6
- For a depth of 101, n = 101, N = (101 - 2) / 9 = 11
- cardinality: the size of the set of transformations.
- Increasing cardinality improves classification accuracy,
- width: Width of the network.
- weight_decay: weight_decay (l2 norm)
- pooling: Optional pooling mode for feature extraction
- when `include_top` is `False`.
- - `None` means that the output of the model will be
- the 4D tensor output of the
- last convolutional layer.
- - `avg` means that global average pooling
- will be applied to the output of the
- last convolutional layer, and thus
- the output of the model will be a 2D tensor.
- - `max` means that global max pooling will
- be applied.
- Returns: a Keras Model
- '''
- if type(depth) is list or type(depth) is tuple:
- # If a list is provided, defer to user how many blocks are present
- N = list(depth)
- else:
- # Otherwise, default to 3 blocks each of default number of group convolution blocks
- N = [(depth - 2) // 9 for _ in range(3)]
- filters = cardinality * width
- filters_list = []
- for i in range(len(N)):
- filters_list.append(filters)
- filters *= 2 # double the size of the filters
- x = __initial_conv_block(img_input, weight_decay)
- # block 1 (no pooling)
- for i in range(N[0]):
- x = __bottleneck_block(x, filters_list[0], cardinality, strides=1,
- weight_decay=weight_decay, attention_module=attention_module)
- N = N[1:] # remove the first block from block definition list
- filters_list = filters_list[1:] # remove the first filter from the filter list
- # block 2 to N
- for block_idx, n_i in enumerate(N):
- for i in range(n_i):
- if i == 0:
- x = __bottleneck_block(x, filters_list[block_idx], cardinality, strides=2,
- weight_decay=weight_decay, attention_module=attention_module)
- else:
- x = __bottleneck_block(x, filters_list[block_idx], cardinality, strides=1,
- weight_decay=weight_decay, attention_module=attention_module)
- if include_top:
- x = GlobalAveragePooling2D()(x)
- x = Dense(nb_classes, use_bias=False, kernel_regularizer=l2(weight_decay),
- kernel_initializer='he_normal', activation='softmax')(x)
- else:
- if pooling == 'avg':
- x = GlobalAveragePooling2D()(x)
- elif pooling == 'max':
- x = GlobalMaxPooling2D()(x)
- return x
- def __create_res_next_imagenet(nb_classes, img_input, include_top, depth, cardinality=32, width=4,
- weight_decay=5e-4, pooling=None, attention_module=None):
- ''' Creates a ResNeXt model with specified parameters
- Args:
- nb_classes: Number of output classes
- img_input: Input tensor or layer
- include_top: Flag to include the last dense layer
- depth: Depth of the network. List of integers.
- Increasing cardinality improves classification accuracy,
- width: Width of the network.
- weight_decay: weight_decay (l2 norm)
- pooling: Optional pooling mode for feature extraction
- when `include_top` is `False`.
- - `None` means that the output of the model will be
- the 4D tensor output of the
- last convolutional layer.
- - `avg` means that global average pooling
- will be applied to the output of the
- last convolutional layer, and thus
- the output of the model will be a 2D tensor.
- - `max` means that global max pooling will
- be applied.
- Returns: a Keras Model
- '''
- if type(depth) is list or type(depth) is tuple:
- # If a list is provided, defer to user how many blocks are present
- N = list(depth)
- else:
- # Otherwise, default to 3 blocks each of default number of group convolution blocks
- N = [(depth - 2) // 9 for _ in range(3)]
- filters = cardinality * width
- filters_list = []
- for i in range(len(N)):
- filters_list.append(filters)
- filters *= 2 # double the size of the filters
- x = __initial_conv_block_inception(img_input, weight_decay)
- # block 1 (no pooling)
- for i in range(N[0]):
- x = __bottleneck_block(x, filters_list[0], cardinality, strides=1,
- weight_decay=weight_decay, attention_module=attention_module)
- N = N[1:] # remove the first block from block definition list
- filters_list = filters_list[1:] # remove the first filter from the filter list
- # block 2 to N
- for block_idx, n_i in enumerate(N):
- for i in range(n_i):
- if i == 0:
- x = __bottleneck_block(x, filters_list[block_idx], cardinality, strides=2,
- weight_decay=weight_decay, attention_module=attention_module)
- else:
- x = __bottleneck_block(x, filters_list[block_idx], cardinality, strides=1,
- weight_decay=weight_decay, attention_module=attention_module)
- if include_top:
- x = GlobalAveragePooling2D()(x)
- x = Dense(nb_classes, use_bias=False, kernel_regularizer=l2(weight_decay),
- kernel_initializer='he_normal', activation='softmax')(x)
- else:
- if pooling == 'avg':
- x = GlobalAveragePooling2D()(x)
- elif pooling == 'max':
- x = GlobalMaxPooling2D()(x)
- return x
|