@@ -152,11 +152,12 @@ testmode!(m::AlphaDropout, mode=true) =
152
152
(m. active = isnothing (_tidy_active (mode)) ? nothing : ! mode; m)
153
153
154
154
"""
155
- LayerNorm(size..., λ=identity; affine=true, ϵ=1fe -5)
155
+ LayerNorm(size..., λ=identity; affine=true, eps=1f -5)
156
156
157
157
A [normalisation layer](https://arxiv.org/abs/1607.06450) designed to be
158
158
used with recurrent hidden states.
159
159
The argument `size` should be an integer or a tuple of integers.
160
+
160
161
In the forward pass, the layer normalises the mean and standard
161
162
deviation of the input, then applies the elementwise activation `λ`.
162
163
The input is normalised along the first `length(size)` dimensions
@@ -190,9 +191,10 @@ struct LayerNorm{F,D,T,N}
190
191
affine:: Bool
191
192
end
192
193
193
- function LayerNorm (size:: Tuple{Vararg{Int}} , λ= identity; affine:: Bool = true , ϵ:: Real = 1f-5 )
194
+ function LayerNorm (size:: Tuple{Vararg{Int}} , λ= identity; affine:: Bool = true , eps:: Real = 1f-5 , ϵ= nothing )
195
+ ε = _greek_ascii_depwarn (ϵ => eps, :LayerNorm , " ϵ" => " eps" )
194
196
diag = affine ? Scale (size... , λ) : λ!= identity ? Base. Fix1 (broadcast, λ) : identity
195
- return LayerNorm (λ, diag, ϵ , size, affine)
197
+ return LayerNorm (λ, diag, ε , size, affine)
196
198
end
197
199
LayerNorm (size:: Integer... ; kw... ) = LayerNorm (Int .(size); kw... )
198
200
LayerNorm (size_act... ; kw... ) = LayerNorm (Int .(size_act[1 : end - 1 ]), size_act[end ]; kw... )
@@ -269,7 +271,7 @@ ChainRulesCore.@non_differentiable _track_stats!(::Any...)
269
271
BatchNorm(channels::Integer, λ=identity;
270
272
initβ=zeros32, initγ=ones32,
271
273
affine=true, track_stats=true, active=nothing,
272
- ϵ =1f-5, momentum= 0.1f0)
274
+ eps =1f-5, momentum= 0.1f0)
273
275
274
276
[Batch Normalization](https://arxiv.org/abs/1502.03167) layer.
275
277
`channels` should be the size of the channel dimension in your data (see below).
@@ -321,16 +323,18 @@ end
321
323
322
324
function BatchNorm (chs:: Int , λ= identity;
323
325
initβ= zeros32, initγ= ones32,
324
- affine= true , track_stats= true , active:: Union{Bool,Nothing} = nothing ,
325
- ϵ= 1f-5 , momentum= 0.1f0 )
326
+ affine:: Bool = true , track_stats:: Bool = true , active:: Union{Bool,Nothing} = nothing ,
327
+ eps:: Real = 1f-5 , momentum:: Real = 0.1f0 , ϵ= nothing )
328
+
329
+ ε = _greek_ascii_depwarn (ϵ => eps, :BatchNorm , " ϵ" => " eps" )
326
330
327
331
β = affine ? initβ (chs) : nothing
328
332
γ = affine ? initγ (chs) : nothing
329
333
μ = track_stats ? zeros32 (chs) : nothing
330
334
σ² = track_stats ? ones32 (chs) : nothing
331
335
332
336
return BatchNorm (λ, β, γ,
333
- μ, σ², ϵ , momentum,
337
+ μ, σ², ε , momentum,
334
338
affine, track_stats,
335
339
active, chs)
336
340
end
361
365
InstanceNorm(channels::Integer, λ=identity;
362
366
initβ=zeros32, initγ=ones32,
363
367
affine=false, track_stats=false,
364
- ϵ =1f-5, momentum=0.1f0)
368
+ eps =1f-5, momentum=0.1f0)
365
369
366
370
[Instance Normalization](https://arxiv.org/abs/1607.08022) layer.
367
371
`channels` should be the size of the channel dimension in your data (see below).
@@ -411,16 +415,18 @@ end
411
415
412
416
function InstanceNorm (chs:: Int , λ= identity;
413
417
initβ= zeros32, initγ= ones32,
414
- affine= false , track_stats= false , active:: Union{Bool,Nothing} = nothing ,
415
- ϵ= 1f-5 , momentum= 0.1f0 )
418
+ affine:: Bool = false , track_stats:: Bool = false , active:: Union{Bool,Nothing} = nothing ,
419
+ eps:: Real = 1f-5 , momentum:: Real = 0.1f0 , ϵ= nothing )
420
+
421
+ ε = _greek_ascii_depwarn (ϵ => eps, :InstanceNorm , " ϵ" => " eps" )
416
422
417
423
β = affine ? initβ (chs) : nothing
418
424
γ = affine ? initγ (chs) : nothing
419
425
μ = track_stats ? zeros32 (chs) : nothing
420
426
σ² = track_stats ? ones32 (chs) : nothing
421
427
422
428
return InstanceNorm (λ, β, γ,
423
- μ, σ², ϵ , momentum,
429
+ μ, σ², ε , momentum,
424
430
affine, track_stats,
425
431
active, chs)
426
432
end
450
456
GroupNorm(channels::Integer, G::Integer, λ=identity;
451
457
initβ=zeros32, initγ=ones32,
452
458
affine=true, track_stats=false,
453
- ϵ =1f-5, momentum=0.1f0)
459
+ eps =1f-5, momentum=0.1f0)
454
460
455
461
[Group Normalization](https://arxiv.org/abs/1803.08494) layer.
456
462
@@ -508,12 +514,13 @@ trainable(gn::GroupNorm) = hasaffine(gn) ? (β = gn.β, γ = gn.γ) : (;)
508
514
509
515
function GroupNorm (chs:: Int , G:: Int , λ= identity;
510
516
initβ= zeros32, initγ= ones32,
511
- affine= true , track_stats= false , active:: Union{Bool,Nothing} = nothing ,
512
- ϵ = 1f-5 , momentum= 0.1f0 )
517
+ affine:: Bool = true , track_stats:: Bool = false , active:: Union{Bool,Nothing} = nothing ,
518
+ eps :: Real = 1f-5 , momentum:: Real = 0.1f0 , ϵ = nothing )
513
519
514
- if track_stats
520
+ if track_stats
515
521
Base. depwarn (" `track_stats=true` will be removed from GroupNorm in Flux 0.14. The default value is `track_stats=false`, which will work as before." , :GroupNorm )
516
- end
522
+ end
523
+ ε = _greek_ascii_depwarn (ϵ => eps, :GroupNorm , " ϵ" => " eps" )
517
524
518
525
chs % G == 0 || error (" The number of groups ($(G) ) must divide the number of channels ($chs )" )
519
526
525
532
return GroupNorm (G, λ,
526
533
β, γ,
527
534
μ, σ²,
528
- ϵ , momentum,
535
+ ε , momentum,
529
536
affine, track_stats,
530
537
active, chs)
531
538
end
0 commit comments