123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553 |
- # D:/workplace/python
- # -*- coding: utf-8 -*-
- # @File :mobilenetV1.py
- # @Author:Guido LuXiaohao
- # @Date :2020/4/8
- # @Software:PyCharm
- """MobileNet v1 models for Keras.
- This is a revised implementation from Somshubra Majumdar's SENet repo:
- (https://github.com/titu1994/keras-squeeze-excite-network)
- # Reference
- - [MobileNets: Efficient Convolutional Neural Networks for
- Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf))
- """
- from __future__ import print_function
- from __future__ import absolute_import
- from __future__ import division
- import warnings
- from keras.models import Model
- from keras.layers import Input
- from keras.layers import Activation
- from keras.layers import Dropout
- from keras.layers import Reshape
- from keras.layers import BatchNormalization
- from keras.layers import GlobalAveragePooling2D
- from keras.layers import GlobalMaxPooling2D
- from keras.layers import Conv2D
- from keras import initializers
- from keras import regularizers
- from keras import constraints
- from keras.utils import conv_utils
- from keras.utils.data_utils import get_file
- from keras.engine.topology import get_source_inputs
- from keras.engine import InputSpec
- from keras.applications import imagenet_utils
- from keras_applications.imagenet_utils import _obtain_input_shape
- from keras.applications.imagenet_utils import decode_predictions
- from keras import backend as K
- from nets.attention_module.se_cbam import attach_attention_module
- def relu6(x):
- return K.relu(x, max_value=6)
- def preprocess_input(x):
- """Preprocesses a numpy array encoding a batch of images.
- # Arguments
- x: a 4D numpy array consists of RGB values within [0, 255].
- # Returns
- Preprocessed array.
- """
- return imagenet_utils.preprocess_input(x, mode='tf')
- class DepthwiseConv2D(Conv2D):
- """Depthwise separable 2D convolution.
- Depthwise Separable convolutions consists in performing
- just the first step in a depthwise spatial convolution
- (which acts on each input channel separately).
- The `depth_multiplier` argument controls how many
- output channels are generated per input channel in the depthwise step.
- # Arguments
- kernel_size: An integer or tuple/list of 2 integers, specifying the
- width and height of the 2D convolution window.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- strides: An integer or tuple/list of 2 integers,
- specifying the strides of the convolution along the width and height.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: one of `'valid'` or `'same'` (case-insensitive).
- depth_multiplier: The number of depthwise convolution output channels
- for each input channel.
- The total number of depthwise convolution output
- channels will be equal to `filters_in * depth_multiplier`.
- data_format: A string,
- one of `channels_last` (default) or `channels_first`.
- The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape
- `(batch, height, width, channels)` while `channels_first`
- corresponds to inputs with shape
- `(batch, channels, height, width)`.
- It defaults to the `image_data_format` value found in your
- Keras config file at `~/.keras/keras.json`.
- If you never set it, then it will be 'channels_last'.
- activation: Activation function to use
- (see [activations](../activations.md)).
- If you don't specify anything, no activation is applied
- (ie. 'linear' activation: `a(x) = x`).
- use_bias: Boolean, whether the layer uses a bias vector.
- depthwise_initializer: Initializer for the depthwise kernel matrix
- (see [initializers](../initializers.md)).
- bias_initializer: Initializer for the bias vector
- (see [initializers](../initializers.md)).
- depthwise_regularizer: Regularizer function applied to
- the depthwise kernel matrix
- (see [regularizer](../regularizers.md)).
- bias_regularizer: Regularizer function applied to the bias vector
- (see [regularizer](../regularizers.md)).
- activity_regularizer: Regularizer function applied to
- the output of the layer (its 'activation').
- (see [regularizer](../regularizers.md)).
- depthwise_constraint: Constraint function applied to
- the depthwise kernel matrix
- (see [constraints](../constraints.md)).
- bias_constraint: Constraint function applied to the bias vector
- (see [constraints](../constraints.md)).
- # Input shape
- 4D tensor with shape:
- `[batch, channels, rows, cols]` if data_format='channels_first'
- or 4D tensor with shape:
- `[batch, rows, cols, channels]` if data_format='channels_last'.
- # Output shape
- 4D tensor with shape:
- `[batch, filters, new_rows, new_cols]` if data_format='channels_first'
- or 4D tensor with shape:
- `[batch, new_rows, new_cols, filters]` if data_format='channels_last'.
- `rows` and `cols` values might have changed due to padding.
- """
- def __init__(self,
- kernel_size,
- strides=(1, 1),
- padding='valid',
- depth_multiplier=1,
- data_format=None,
- activation=None,
- use_bias=True,
- depthwise_initializer='glorot_uniform',
- bias_initializer='zeros',
- depthwise_regularizer=None,
- bias_regularizer=None,
- activity_regularizer=None,
- depthwise_constraint=None,
- bias_constraint=None,
- **kwargs):
- super(DepthwiseConv2D, self).__init__(
- filters=None,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- activation=activation,
- use_bias=use_bias,
- bias_regularizer=bias_regularizer,
- activity_regularizer=activity_regularizer,
- bias_constraint=bias_constraint,
- **kwargs)
- self.depth_multiplier = depth_multiplier
- self.depthwise_initializer = initializers.get(depthwise_initializer)
- self.depthwise_regularizer = regularizers.get(depthwise_regularizer)
- self.depthwise_constraint = constraints.get(depthwise_constraint)
- self.bias_initializer = initializers.get(bias_initializer)
- def build(self, input_shape):
- if len(input_shape) < 4:
- raise ValueError('Inputs to `DepthwiseConv2D` should have rank 4. '
- 'Received input shape:', str(input_shape))
- if self.data_format == 'channels_first':
- channel_axis = 1
- else:
- channel_axis = 3
- if input_shape[channel_axis] is None:
- raise ValueError('The channel dimension of the inputs to '
- '`DepthwiseConv2D` '
- 'should be defined. Found `None`.')
- input_dim = int(input_shape[channel_axis])
- depthwise_kernel_shape = (self.kernel_size[0],
- self.kernel_size[1],
- input_dim,
- self.depth_multiplier)
- self.depthwise_kernel = self.add_weight(
- shape=depthwise_kernel_shape,
- initializer=self.depthwise_initializer,
- name='depthwise_kernel',
- regularizer=self.depthwise_regularizer,
- constraint=self.depthwise_constraint)
- if self.use_bias:
- self.bias = self.add_weight(shape=(input_dim * self.depth_multiplier,),
- initializer=self.bias_initializer,
- name='bias',
- regularizer=self.bias_regularizer,
- constraint=self.bias_constraint)
- else:
- self.bias = None
- # Set input spec.
- self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim})
- self.built = True
- def call(self, inputs, training=None):
- outputs = K.depthwise_conv2d(
- inputs,
- self.depthwise_kernel,
- strides=self.strides,
- padding=self.padding,
- dilation_rate=self.dilation_rate,
- data_format=self.data_format)
- if self.bias:
- outputs = K.bias_add(
- outputs,
- self.bias,
- data_format=self.data_format)
- if self.activation is not None:
- return self.activation(outputs)
- return outputs
- def compute_output_shape(self, input_shape):
- if self.data_format == 'channels_first':
- rows = input_shape[2]
- cols = input_shape[3]
- out_filters = input_shape[1] * self.depth_multiplier
- elif self.data_format == 'channels_last':
- rows = input_shape[1]
- cols = input_shape[2]
- out_filters = input_shape[3] * self.depth_multiplier
- rows = conv_utils.conv_output_length(rows, self.kernel_size[0],
- self.padding,
- self.strides[0])
- cols = conv_utils.conv_output_length(cols, self.kernel_size[1],
- self.padding,
- self.strides[1])
- if self.data_format == 'channels_first':
- return (input_shape[0], out_filters, rows, cols)
- elif self.data_format == 'channels_last':
- return (input_shape[0], rows, cols, out_filters)
- def get_config(self):
- config = super(DepthwiseConv2D, self).get_config()
- config.pop('filters')
- config.pop('kernel_initializer')
- config.pop('kernel_regularizer')
- config.pop('kernel_constraint')
- config['depth_multiplier'] = self.depth_multiplier
- config['depthwise_initializer'] = initializers.serialize(self.depthwise_initializer)
- config['depthwise_regularizer'] = regularizers.serialize(self.depthwise_regularizer)
- config['depthwise_constraint'] = constraints.serialize(self.depthwise_constraint)
- return config
- def MobileNet(input_shape=None,
- alpha=1.0,
- depth_multiplier=1,
- dropout=1e-3,
- include_top=True,
- weights=None,
- input_tensor=None,
- pooling=None,
- classes=1000,
- attention_module=None):
- """Instantiates the SE-MobileNet architecture.
- Note that only TensorFlow is supported for now,
- therefore it only works with the data format
- `image_data_format='channels_last'` in your Keras config
- at `~/.keras/keras.json`.
- To load a MobileNet model via `load_model`, import the custom
- objects `relu6` and `DepthwiseConv2D` and pass them to the
- `custom_objects` parameter.
- E.g.
- model = load_model('mobilenet.h5', custom_objects={
- 'relu6': mobilenet.relu6,
- 'DepthwiseConv2D': mobilenet.DepthwiseConv2D})
- # Arguments
- input_shape: optional shape tuple, only to be specified
- if `include_top` is False (otherwise the input shape
- has to be `(224, 224, 3)` (with `channels_last` data format)
- or (3, 224, 224) (with `channels_first` data format).
- It should have exactly 3 inputs channels,
- and width and height should be no smaller than 32.
- E.g. `(200, 200, 3)` would be one valid value.
- alpha: controls the width of the network.
- - If `alpha` < 1.0, proportionally decreases the number
- of filters in each layer.
- - If `alpha` > 1.0, proportionally increases the number
- of filters in each layer.
- - If `alpha` = 1, default number of filters from the paper
- are used at each layer.
- depth_multiplier: depth multiplier for depthwise convolution
- (also called the resolution multiplier)
- dropout: dropout rate
- include_top: whether to include the fully-connected
- layer at the top of the network.
- weights: `None` (random initialization) or
- `imagenet` (ImageNet weights)
- input_tensor: optional Keras tensor (i.e. output of
- `layers.Input()`)
- to use as image input for the model.
- pooling: Optional pooling mode for feature extraction
- when `include_top` is `False`.
- - `None` means that the output of the model
- will be the 4D tensor output of the
- last convolutional layer.
- - `avg` means that global average pooling
- will be applied to the output of the
- last convolutional layer, and thus
- the output of the model will be a
- 2D tensor.
- - `max` means that global max pooling will
- be applied.
- classes: optional number of classes to classify images
- into, only to be specified if `include_top` is True, and
- if no `weights` argument is specified.
- # Returns
- A Keras model instance.
- # Raises
- ValueError: in case of invalid argument for `weights`,
- or invalid input shape.
- RuntimeError: If attempting to run this model with a
- backend that does not support separable convolutions.
- """
- if K.backend() != 'tensorflow':
- raise RuntimeError('Only TensorFlow backend is currently supported, '
- 'as other backends do not support '
- 'depthwise convolution.')
- if weights not in {'imagenet', None}:
- raise ValueError('The `weights` argument should be either '
- '`None` (random initialization) or `imagenet` '
- '(pre-training on ImageNet).')
- if weights == 'imagenet' and include_top and classes != 1000:
- raise ValueError('If using `weights` as ImageNet with `include_top` '
- 'as true, `classes` should be 1000')
- # Determine proper input shape and default size.
- if input_shape is None:
- default_size = 224
- else:
- if K.image_data_format() == 'channels_first':
- rows = input_shape[1]
- cols = input_shape[2]
- else:
- rows = input_shape[0]
- cols = input_shape[1]
- if rows == cols and rows in [128, 160, 192, 224]:
- default_size = rows
- else:
- default_size = 224
- input_shape = _obtain_input_shape(input_shape,
- default_size=default_size,
- min_size=32,
- data_format=K.image_data_format(),
- require_flatten=include_top,
- weights=weights)
- if K.image_data_format() == 'channels_last':
- row_axis, col_axis = (0, 1)
- else:
- row_axis, col_axis = (1, 2)
- rows = input_shape[row_axis]
- cols = input_shape[col_axis]
- if input_tensor is None:
- img_input = Input(shape=input_shape)
- else:
- if not K.is_keras_tensor(input_tensor):
- img_input = Input(tensor=input_tensor, shape=input_shape)
- else:
- img_input = input_tensor
- x = _conv_block(img_input, 32, alpha, strides=(2, 2))
- x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1, attention_module=attention_module)
- x = _depthwise_conv_block(x, 128, alpha, depth_multiplier,
- strides=(2, 2), block_id=2, attention_module=attention_module)
- x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3, attention_module=attention_module)
- x = _depthwise_conv_block(x, 256, alpha, depth_multiplier,
- strides=(2, 2), block_id=4, attention_module=attention_module)
- x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5, attention_module=attention_module)
- x = _depthwise_conv_block(x, 512, alpha, depth_multiplier,
- strides=(2, 2), block_id=6, attention_module=attention_module)
- x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7, attention_module=attention_module)
- x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8, attention_module=attention_module)
- x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9, attention_module=attention_module)
- x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10, attention_module=attention_module)
- x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11, attention_module=attention_module)
- x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier,
- strides=(2, 2), block_id=12, attention_module=attention_module)
- x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13, attention_module=attention_module)
- if include_top:
- if K.image_data_format() == 'channels_first':
- shape = (int(1024 * alpha), 1, 1)
- else:
- shape = (1, 1, int(1024 * alpha))
- x = GlobalAveragePooling2D()(x)
- x = Reshape(shape, name='reshape_n_1')(x)
- x = Dropout(dropout, name='dropout')(x)
- x = Conv2D(classes, (1, 1),
- padding='same', name='conv_preds')(x)
- x = Activation('softmax', name='act_softmax')(x)
- x = Reshape((classes,), name='reshape_final')(x)
- else:
- if pooling == 'avg':
- x = GlobalAveragePooling2D()(x)
- elif pooling == 'max':
- x = GlobalMaxPooling2D()(x)
- # Ensure that the model takes into account
- # any potential predecessors of `input_tensor`.
- if input_tensor is not None:
- inputs = get_source_inputs(input_tensor)
- else:
- inputs = img_input
- # Create model.
- model = Model(inputs, x, name='se_mobilenet_%0.2f_%s' % (alpha, rows))
- return model
- def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
- """Adds an initial convolution layer (with batch normalization and relu6).
- # Arguments
- inputs: Input tensor of shape `(rows, cols, 3)`
- (with `channels_last` data format) or
- (3, rows, cols) (with `channels_first` data format).
- It should have exactly 3 inputs channels,
- and width and height should be no smaller than 32.
- E.g. `(224, 224, 3)` would be one valid value.
- filters: Integer, the dimensionality of the output space
- (i.e. the number output of filters in the convolution).
- alpha: controls the width of the network.
- - If `alpha` < 1.0, proportionally decreases the number
- of filters in each layer.
- - If `alpha` > 1.0, proportionally increases the number
- of filters in each layer.
- - If `alpha` = 1, default number of filters from the paper
- are used at each layer.
- kernel: An integer or tuple/list of 2 integers, specifying the
- width and height of the 2D convolution window.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- strides: An integer or tuple/list of 2 integers,
- specifying the strides of the convolution along the width and height.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- # Input shape
- 4D tensor with shape:
- `(samples, channels, rows, cols)` if data_format='channels_first'
- or 4D tensor with shape:
- `(samples, rows, cols, channels)` if data_format='channels_last'.
- # Output shape
- 4D tensor with shape:
- `(samples, filters, new_rows, new_cols)` if data_format='channels_first'
- or 4D tensor with shape:
- `(samples, new_rows, new_cols, filters)` if data_format='channels_last'.
- `rows` and `cols` values might have changed due to stride.
- # Returns
- Output tensor of block.
- """
- channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
- filters = int(filters * alpha)
- x = Conv2D(filters, kernel,
- padding='same',
- use_bias=False,
- strides=strides,
- name='conv1')(inputs)
- x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x)
- return Activation(relu6, name='conv1_relu')(x)
- def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha,
- depth_multiplier=1, strides=(1, 1), block_id=1, attention_module=None):
- """Adds a depthwise convolution block.
- A depthwise convolution block consists of a depthwise conv,
- batch normalization, relu6, pointwise convolution,
- batch normalization and relu6 activation.
- # Arguments
- inputs: Input tensor of shape `(rows, cols, channels)`
- (with `channels_last` data format) or
- (channels, rows, cols) (with `channels_first` data format).
- pointwise_conv_filters: Integer, the dimensionality of the output space
- (i.e. the number output of filters in the pointwise convolution).
- alpha: controls the width of the network.
- - If `alpha` < 1.0, proportionally decreases the number
- of filters in each layer.
- - If `alpha` > 1.0, proportionally increases the number
- of filters in each layer.
- - If `alpha` = 1, default number of filters from the paper
- are used at each layer.
- depth_multiplier: The number of depthwise convolution output channels
- for each input channel.
- The total number of depthwise convolution output
- channels will be equal to `filters_in * depth_multiplier`.
- strides: An integer or tuple/list of 2 integers,
- specifying the strides of the convolution along the width and height.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- block_id: Integer, a unique identification designating the block number.
- # Input shape
- 4D tensor with shape:
- `(batch, channels, rows, cols)` if data_format='channels_first'
- or 4D tensor with shape:
- `(batch, rows, cols, channels)` if data_format='channels_last'.
- # Output shape
- 4D tensor with shape:
- `(batch, filters, new_rows, new_cols)` if data_format='channels_first'
- or 4D tensor with shape:
- `(batch, new_rows, new_cols, filters)` if data_format='channels_last'.
- `rows` and `cols` values might have changed due to stride.
- # Returns
- Output tensor of block.
- """
- channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
- pointwise_conv_filters = int(pointwise_conv_filters * alpha)
- x = DepthwiseConv2D((3, 3),
- padding='same',
- depth_multiplier=depth_multiplier,
- strides=strides,
- use_bias=False,
- name='conv_dw_%d' % block_id)(inputs)
- x = BatchNormalization(axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x)
- x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x)
- x = Conv2D(pointwise_conv_filters, (1, 1),
- padding='same',
- use_bias=False,
- strides=(1, 1),
- name='conv_pw_%d' % block_id)(x)
- x = BatchNormalization(axis=channel_axis, name='conv_pw_%d_bn' % block_id)(x)
- x = Activation(relu6, name='conv_pw_%d_relu' % block_id)(x)
- # attention_module
- if attention_module is not None:
- x = attach_attention_module(x, attention_module)
- return x
|