mobilenetV1.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. # D:/workplace/python
  2. # -*- coding: utf-8 -*-
  3. # @File :mobilenetV1.py
  4. # @Author:Guido LuXiaohao
  5. # @Date :2020/4/8
  6. # @Software:PyCharm
  7. """MobileNet v1 models for Keras.
  8. This is a revised implementation from Somshubra Majumdar's SENet repo:
  9. (https://github.com/titu1994/keras-squeeze-excite-network)
  10. # Reference
  11. - [MobileNets: Efficient Convolutional Neural Networks for
  12. Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf))
  13. """
  14. from __future__ import print_function
  15. from __future__ import absolute_import
  16. from __future__ import division
  17. import warnings
  18. from keras.models import Model
  19. from keras.layers import Input
  20. from keras.layers import Activation
  21. from keras.layers import Dropout
  22. from keras.layers import Reshape
  23. from keras.layers import BatchNormalization
  24. from keras.layers import GlobalAveragePooling2D
  25. from keras.layers import GlobalMaxPooling2D
  26. from keras.layers import Conv2D
  27. from keras import initializers
  28. from keras import regularizers
  29. from keras import constraints
  30. from keras.utils import conv_utils
  31. from keras.utils.data_utils import get_file
  32. from keras.engine.topology import get_source_inputs
  33. from keras.engine import InputSpec
  34. from keras.applications import imagenet_utils
  35. from keras_applications.imagenet_utils import _obtain_input_shape
  36. from keras.applications.imagenet_utils import decode_predictions
  37. from keras import backend as K
  38. from nets.attention_module.se_cbam import attach_attention_module
  39. def relu6(x):
  40. return K.relu(x, max_value=6)
  41. def preprocess_input(x):
  42. """Preprocesses a numpy array encoding a batch of images.
  43. # Arguments
  44. x: a 4D numpy array consists of RGB values within [0, 255].
  45. # Returns
  46. Preprocessed array.
  47. """
  48. return imagenet_utils.preprocess_input(x, mode='tf')
  49. class DepthwiseConv2D(Conv2D):
  50. """Depthwise separable 2D convolution.
  51. Depthwise Separable convolutions consists in performing
  52. just the first step in a depthwise spatial convolution
  53. (which acts on each input channel separately).
  54. The `depth_multiplier` argument controls how many
  55. output channels are generated per input channel in the depthwise step.
  56. # Arguments
  57. kernel_size: An integer or tuple/list of 2 integers, specifying the
  58. width and height of the 2D convolution window.
  59. Can be a single integer to specify the same value for
  60. all spatial dimensions.
  61. strides: An integer or tuple/list of 2 integers,
  62. specifying the strides of the convolution along the width and height.
  63. Can be a single integer to specify the same value for
  64. all spatial dimensions.
  65. Specifying any stride value != 1 is incompatible with specifying
  66. any `dilation_rate` value != 1.
  67. padding: one of `'valid'` or `'same'` (case-insensitive).
  68. depth_multiplier: The number of depthwise convolution output channels
  69. for each input channel.
  70. The total number of depthwise convolution output
  71. channels will be equal to `filters_in * depth_multiplier`.
  72. data_format: A string,
  73. one of `channels_last` (default) or `channels_first`.
  74. The ordering of the dimensions in the inputs.
  75. `channels_last` corresponds to inputs with shape
  76. `(batch, height, width, channels)` while `channels_first`
  77. corresponds to inputs with shape
  78. `(batch, channels, height, width)`.
  79. It defaults to the `image_data_format` value found in your
  80. Keras config file at `~/.keras/keras.json`.
  81. If you never set it, then it will be 'channels_last'.
  82. activation: Activation function to use
  83. (see [activations](../activations.md)).
  84. If you don't specify anything, no activation is applied
  85. (ie. 'linear' activation: `a(x) = x`).
  86. use_bias: Boolean, whether the layer uses a bias vector.
  87. depthwise_initializer: Initializer for the depthwise kernel matrix
  88. (see [initializers](../initializers.md)).
  89. bias_initializer: Initializer for the bias vector
  90. (see [initializers](../initializers.md)).
  91. depthwise_regularizer: Regularizer function applied to
  92. the depthwise kernel matrix
  93. (see [regularizer](../regularizers.md)).
  94. bias_regularizer: Regularizer function applied to the bias vector
  95. (see [regularizer](../regularizers.md)).
  96. activity_regularizer: Regularizer function applied to
  97. the output of the layer (its 'activation').
  98. (see [regularizer](../regularizers.md)).
  99. depthwise_constraint: Constraint function applied to
  100. the depthwise kernel matrix
  101. (see [constraints](../constraints.md)).
  102. bias_constraint: Constraint function applied to the bias vector
  103. (see [constraints](../constraints.md)).
  104. # Input shape
  105. 4D tensor with shape:
  106. `[batch, channels, rows, cols]` if data_format='channels_first'
  107. or 4D tensor with shape:
  108. `[batch, rows, cols, channels]` if data_format='channels_last'.
  109. # Output shape
  110. 4D tensor with shape:
  111. `[batch, filters, new_rows, new_cols]` if data_format='channels_first'
  112. or 4D tensor with shape:
  113. `[batch, new_rows, new_cols, filters]` if data_format='channels_last'.
  114. `rows` and `cols` values might have changed due to padding.
  115. """
  116. def __init__(self,
  117. kernel_size,
  118. strides=(1, 1),
  119. padding='valid',
  120. depth_multiplier=1,
  121. data_format=None,
  122. activation=None,
  123. use_bias=True,
  124. depthwise_initializer='glorot_uniform',
  125. bias_initializer='zeros',
  126. depthwise_regularizer=None,
  127. bias_regularizer=None,
  128. activity_regularizer=None,
  129. depthwise_constraint=None,
  130. bias_constraint=None,
  131. **kwargs):
  132. super(DepthwiseConv2D, self).__init__(
  133. filters=None,
  134. kernel_size=kernel_size,
  135. strides=strides,
  136. padding=padding,
  137. data_format=data_format,
  138. activation=activation,
  139. use_bias=use_bias,
  140. bias_regularizer=bias_regularizer,
  141. activity_regularizer=activity_regularizer,
  142. bias_constraint=bias_constraint,
  143. **kwargs)
  144. self.depth_multiplier = depth_multiplier
  145. self.depthwise_initializer = initializers.get(depthwise_initializer)
  146. self.depthwise_regularizer = regularizers.get(depthwise_regularizer)
  147. self.depthwise_constraint = constraints.get(depthwise_constraint)
  148. self.bias_initializer = initializers.get(bias_initializer)
  149. def build(self, input_shape):
  150. if len(input_shape) < 4:
  151. raise ValueError('Inputs to `DepthwiseConv2D` should have rank 4. '
  152. 'Received input shape:', str(input_shape))
  153. if self.data_format == 'channels_first':
  154. channel_axis = 1
  155. else:
  156. channel_axis = 3
  157. if input_shape[channel_axis] is None:
  158. raise ValueError('The channel dimension of the inputs to '
  159. '`DepthwiseConv2D` '
  160. 'should be defined. Found `None`.')
  161. input_dim = int(input_shape[channel_axis])
  162. depthwise_kernel_shape = (self.kernel_size[0],
  163. self.kernel_size[1],
  164. input_dim,
  165. self.depth_multiplier)
  166. self.depthwise_kernel = self.add_weight(
  167. shape=depthwise_kernel_shape,
  168. initializer=self.depthwise_initializer,
  169. name='depthwise_kernel',
  170. regularizer=self.depthwise_regularizer,
  171. constraint=self.depthwise_constraint)
  172. if self.use_bias:
  173. self.bias = self.add_weight(shape=(input_dim * self.depth_multiplier,),
  174. initializer=self.bias_initializer,
  175. name='bias',
  176. regularizer=self.bias_regularizer,
  177. constraint=self.bias_constraint)
  178. else:
  179. self.bias = None
  180. # Set input spec.
  181. self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim})
  182. self.built = True
  183. def call(self, inputs, training=None):
  184. outputs = K.depthwise_conv2d(
  185. inputs,
  186. self.depthwise_kernel,
  187. strides=self.strides,
  188. padding=self.padding,
  189. dilation_rate=self.dilation_rate,
  190. data_format=self.data_format)
  191. if self.bias:
  192. outputs = K.bias_add(
  193. outputs,
  194. self.bias,
  195. data_format=self.data_format)
  196. if self.activation is not None:
  197. return self.activation(outputs)
  198. return outputs
  199. def compute_output_shape(self, input_shape):
  200. if self.data_format == 'channels_first':
  201. rows = input_shape[2]
  202. cols = input_shape[3]
  203. out_filters = input_shape[1] * self.depth_multiplier
  204. elif self.data_format == 'channels_last':
  205. rows = input_shape[1]
  206. cols = input_shape[2]
  207. out_filters = input_shape[3] * self.depth_multiplier
  208. rows = conv_utils.conv_output_length(rows, self.kernel_size[0],
  209. self.padding,
  210. self.strides[0])
  211. cols = conv_utils.conv_output_length(cols, self.kernel_size[1],
  212. self.padding,
  213. self.strides[1])
  214. if self.data_format == 'channels_first':
  215. return (input_shape[0], out_filters, rows, cols)
  216. elif self.data_format == 'channels_last':
  217. return (input_shape[0], rows, cols, out_filters)
  218. def get_config(self):
  219. config = super(DepthwiseConv2D, self).get_config()
  220. config.pop('filters')
  221. config.pop('kernel_initializer')
  222. config.pop('kernel_regularizer')
  223. config.pop('kernel_constraint')
  224. config['depth_multiplier'] = self.depth_multiplier
  225. config['depthwise_initializer'] = initializers.serialize(self.depthwise_initializer)
  226. config['depthwise_regularizer'] = regularizers.serialize(self.depthwise_regularizer)
  227. config['depthwise_constraint'] = constraints.serialize(self.depthwise_constraint)
  228. return config
  229. def MobileNet(input_shape=None,
  230. alpha=1.0,
  231. depth_multiplier=1,
  232. dropout=1e-3,
  233. include_top=True,
  234. weights=None,
  235. input_tensor=None,
  236. pooling=None,
  237. classes=1000,
  238. attention_module=None):
  239. """Instantiates the SE-MobileNet architecture.
  240. Note that only TensorFlow is supported for now,
  241. therefore it only works with the data format
  242. `image_data_format='channels_last'` in your Keras config
  243. at `~/.keras/keras.json`.
  244. To load a MobileNet model via `load_model`, import the custom
  245. objects `relu6` and `DepthwiseConv2D` and pass them to the
  246. `custom_objects` parameter.
  247. E.g.
  248. model = load_model('mobilenet.h5', custom_objects={
  249. 'relu6': mobilenet.relu6,
  250. 'DepthwiseConv2D': mobilenet.DepthwiseConv2D})
  251. # Arguments
  252. input_shape: optional shape tuple, only to be specified
  253. if `include_top` is False (otherwise the input shape
  254. has to be `(224, 224, 3)` (with `channels_last` data format)
  255. or (3, 224, 224) (with `channels_first` data format).
  256. It should have exactly 3 inputs channels,
  257. and width and height should be no smaller than 32.
  258. E.g. `(200, 200, 3)` would be one valid value.
  259. alpha: controls the width of the network.
  260. - If `alpha` < 1.0, proportionally decreases the number
  261. of filters in each layer.
  262. - If `alpha` > 1.0, proportionally increases the number
  263. of filters in each layer.
  264. - If `alpha` = 1, default number of filters from the paper
  265. are used at each layer.
  266. depth_multiplier: depth multiplier for depthwise convolution
  267. (also called the resolution multiplier)
  268. dropout: dropout rate
  269. include_top: whether to include the fully-connected
  270. layer at the top of the network.
  271. weights: `None` (random initialization) or
  272. `imagenet` (ImageNet weights)
  273. input_tensor: optional Keras tensor (i.e. output of
  274. `layers.Input()`)
  275. to use as image input for the model.
  276. pooling: Optional pooling mode for feature extraction
  277. when `include_top` is `False`.
  278. - `None` means that the output of the model
  279. will be the 4D tensor output of the
  280. last convolutional layer.
  281. - `avg` means that global average pooling
  282. will be applied to the output of the
  283. last convolutional layer, and thus
  284. the output of the model will be a
  285. 2D tensor.
  286. - `max` means that global max pooling will
  287. be applied.
  288. classes: optional number of classes to classify images
  289. into, only to be specified if `include_top` is True, and
  290. if no `weights` argument is specified.
  291. # Returns
  292. A Keras model instance.
  293. # Raises
  294. ValueError: in case of invalid argument for `weights`,
  295. or invalid input shape.
  296. RuntimeError: If attempting to run this model with a
  297. backend that does not support separable convolutions.
  298. """
  299. if K.backend() != 'tensorflow':
  300. raise RuntimeError('Only TensorFlow backend is currently supported, '
  301. 'as other backends do not support '
  302. 'depthwise convolution.')
  303. if weights not in {'imagenet', None}:
  304. raise ValueError('The `weights` argument should be either '
  305. '`None` (random initialization) or `imagenet` '
  306. '(pre-training on ImageNet).')
  307. if weights == 'imagenet' and include_top and classes != 1000:
  308. raise ValueError('If using `weights` as ImageNet with `include_top` '
  309. 'as true, `classes` should be 1000')
  310. # Determine proper input shape and default size.
  311. if input_shape is None:
  312. default_size = 224
  313. else:
  314. if K.image_data_format() == 'channels_first':
  315. rows = input_shape[1]
  316. cols = input_shape[2]
  317. else:
  318. rows = input_shape[0]
  319. cols = input_shape[1]
  320. if rows == cols and rows in [128, 160, 192, 224]:
  321. default_size = rows
  322. else:
  323. default_size = 224
  324. input_shape = _obtain_input_shape(input_shape,
  325. default_size=default_size,
  326. min_size=32,
  327. data_format=K.image_data_format(),
  328. require_flatten=include_top,
  329. weights=weights)
  330. if K.image_data_format() == 'channels_last':
  331. row_axis, col_axis = (0, 1)
  332. else:
  333. row_axis, col_axis = (1, 2)
  334. rows = input_shape[row_axis]
  335. cols = input_shape[col_axis]
  336. if input_tensor is None:
  337. img_input = Input(shape=input_shape)
  338. else:
  339. if not K.is_keras_tensor(input_tensor):
  340. img_input = Input(tensor=input_tensor, shape=input_shape)
  341. else:
  342. img_input = input_tensor
  343. x = _conv_block(img_input, 32, alpha, strides=(2, 2))
  344. x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1, attention_module=attention_module)
  345. x = _depthwise_conv_block(x, 128, alpha, depth_multiplier,
  346. strides=(2, 2), block_id=2, attention_module=attention_module)
  347. x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3, attention_module=attention_module)
  348. x = _depthwise_conv_block(x, 256, alpha, depth_multiplier,
  349. strides=(2, 2), block_id=4, attention_module=attention_module)
  350. x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5, attention_module=attention_module)
  351. x = _depthwise_conv_block(x, 512, alpha, depth_multiplier,
  352. strides=(2, 2), block_id=6, attention_module=attention_module)
  353. x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7, attention_module=attention_module)
  354. x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8, attention_module=attention_module)
  355. x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9, attention_module=attention_module)
  356. x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10, attention_module=attention_module)
  357. x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11, attention_module=attention_module)
  358. x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier,
  359. strides=(2, 2), block_id=12, attention_module=attention_module)
  360. x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13, attention_module=attention_module)
  361. if include_top:
  362. if K.image_data_format() == 'channels_first':
  363. shape = (int(1024 * alpha), 1, 1)
  364. else:
  365. shape = (1, 1, int(1024 * alpha))
  366. x = GlobalAveragePooling2D()(x)
  367. x = Reshape(shape, name='reshape_n_1')(x)
  368. x = Dropout(dropout, name='dropout')(x)
  369. x = Conv2D(classes, (1, 1),
  370. padding='same', name='conv_preds')(x)
  371. x = Activation('softmax', name='act_softmax')(x)
  372. x = Reshape((classes,), name='reshape_final')(x)
  373. else:
  374. if pooling == 'avg':
  375. x = GlobalAveragePooling2D()(x)
  376. elif pooling == 'max':
  377. x = GlobalMaxPooling2D()(x)
  378. # Ensure that the model takes into account
  379. # any potential predecessors of `input_tensor`.
  380. if input_tensor is not None:
  381. inputs = get_source_inputs(input_tensor)
  382. else:
  383. inputs = img_input
  384. # Create model.
  385. model = Model(inputs, x, name='se_mobilenet_%0.2f_%s' % (alpha, rows))
  386. return model
  387. def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
  388. """Adds an initial convolution layer (with batch normalization and relu6).
  389. # Arguments
  390. inputs: Input tensor of shape `(rows, cols, 3)`
  391. (with `channels_last` data format) or
  392. (3, rows, cols) (with `channels_first` data format).
  393. It should have exactly 3 inputs channels,
  394. and width and height should be no smaller than 32.
  395. E.g. `(224, 224, 3)` would be one valid value.
  396. filters: Integer, the dimensionality of the output space
  397. (i.e. the number output of filters in the convolution).
  398. alpha: controls the width of the network.
  399. - If `alpha` < 1.0, proportionally decreases the number
  400. of filters in each layer.
  401. - If `alpha` > 1.0, proportionally increases the number
  402. of filters in each layer.
  403. - If `alpha` = 1, default number of filters from the paper
  404. are used at each layer.
  405. kernel: An integer or tuple/list of 2 integers, specifying the
  406. width and height of the 2D convolution window.
  407. Can be a single integer to specify the same value for
  408. all spatial dimensions.
  409. strides: An integer or tuple/list of 2 integers,
  410. specifying the strides of the convolution along the width and height.
  411. Can be a single integer to specify the same value for
  412. all spatial dimensions.
  413. Specifying any stride value != 1 is incompatible with specifying
  414. any `dilation_rate` value != 1.
  415. # Input shape
  416. 4D tensor with shape:
  417. `(samples, channels, rows, cols)` if data_format='channels_first'
  418. or 4D tensor with shape:
  419. `(samples, rows, cols, channels)` if data_format='channels_last'.
  420. # Output shape
  421. 4D tensor with shape:
  422. `(samples, filters, new_rows, new_cols)` if data_format='channels_first'
  423. or 4D tensor with shape:
  424. `(samples, new_rows, new_cols, filters)` if data_format='channels_last'.
  425. `rows` and `cols` values might have changed due to stride.
  426. # Returns
  427. Output tensor of block.
  428. """
  429. channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
  430. filters = int(filters * alpha)
  431. x = Conv2D(filters, kernel,
  432. padding='same',
  433. use_bias=False,
  434. strides=strides,
  435. name='conv1')(inputs)
  436. x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x)
  437. return Activation(relu6, name='conv1_relu')(x)
  438. def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha,
  439. depth_multiplier=1, strides=(1, 1), block_id=1, attention_module=None):
  440. """Adds a depthwise convolution block.
  441. A depthwise convolution block consists of a depthwise conv,
  442. batch normalization, relu6, pointwise convolution,
  443. batch normalization and relu6 activation.
  444. # Arguments
  445. inputs: Input tensor of shape `(rows, cols, channels)`
  446. (with `channels_last` data format) or
  447. (channels, rows, cols) (with `channels_first` data format).
  448. pointwise_conv_filters: Integer, the dimensionality of the output space
  449. (i.e. the number output of filters in the pointwise convolution).
  450. alpha: controls the width of the network.
  451. - If `alpha` < 1.0, proportionally decreases the number
  452. of filters in each layer.
  453. - If `alpha` > 1.0, proportionally increases the number
  454. of filters in each layer.
  455. - If `alpha` = 1, default number of filters from the paper
  456. are used at each layer.
  457. depth_multiplier: The number of depthwise convolution output channels
  458. for each input channel.
  459. The total number of depthwise convolution output
  460. channels will be equal to `filters_in * depth_multiplier`.
  461. strides: An integer or tuple/list of 2 integers,
  462. specifying the strides of the convolution along the width and height.
  463. Can be a single integer to specify the same value for
  464. all spatial dimensions.
  465. Specifying any stride value != 1 is incompatible with specifying
  466. any `dilation_rate` value != 1.
  467. block_id: Integer, a unique identification designating the block number.
  468. # Input shape
  469. 4D tensor with shape:
  470. `(batch, channels, rows, cols)` if data_format='channels_first'
  471. or 4D tensor with shape:
  472. `(batch, rows, cols, channels)` if data_format='channels_last'.
  473. # Output shape
  474. 4D tensor with shape:
  475. `(batch, filters, new_rows, new_cols)` if data_format='channels_first'
  476. or 4D tensor with shape:
  477. `(batch, new_rows, new_cols, filters)` if data_format='channels_last'.
  478. `rows` and `cols` values might have changed due to stride.
  479. # Returns
  480. Output tensor of block.
  481. """
  482. channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
  483. pointwise_conv_filters = int(pointwise_conv_filters * alpha)
  484. x = DepthwiseConv2D((3, 3),
  485. padding='same',
  486. depth_multiplier=depth_multiplier,
  487. strides=strides,
  488. use_bias=False,
  489. name='conv_dw_%d' % block_id)(inputs)
  490. x = BatchNormalization(axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x)
  491. x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x)
  492. x = Conv2D(pointwise_conv_filters, (1, 1),
  493. padding='same',
  494. use_bias=False,
  495. strides=(1, 1),
  496. name='conv_pw_%d' % block_id)(x)
  497. x = BatchNormalization(axis=channel_axis, name='conv_pw_%d_bn' % block_id)(x)
  498. x = Activation(relu6, name='conv_pw_%d_relu' % block_id)(x)
  499. # attention_module
  500. if attention_module is not None:
  501. x = attach_attention_module(x, attention_module)
  502. return x