-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpostprocessing.py
125 lines (112 loc) · 4.48 KB
/
postprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from typing import Union
import tensorflow as tf
class ExponentialMovingAverage(tf.keras.layers.Layer):
"""Computes of an exponential moving average of an sequential input."""
def __init__(
self,
coeff_init: Union[float, tf.Tensor],
per_channel: bool = False, trainable: bool = False):
"""Initializes the ExponentialMovingAverage.
Args:
coeff_init: the value of the initial coeff.
per_channel: whether the smoothing should be different per channel.
trainable: whether the smoothing should be trained or not.
"""
super().__init__(name='EMA')
self._coeff_init = coeff_init
self._per_channel = per_channel
self._trainable = trainable
def build(self, input_shape):
num_channels = input_shape[-1]
self._weights = self.add_weight(
name='smooth',
shape=(num_channels,) if self._per_channel else (1,),
initializer=tf.keras.initializers.Constant(self._coeff_init),
trainable=self._trainable)
def call(self, inputs: tf.Tensor, initial_state: tf.Tensor):
"""Inputs is of shape [batch, seq_length, num_filters]."""
w = tf.clip_by_value(self._weights, clip_value_min=0.0, clip_value_max=1.0)
result = tf.scan(lambda a, x: w * x + (1.0 - w) * a,
tf.transpose(inputs, (1, 0, 2)),
initializer=initial_state)
return tf.transpose(result, (1, 0, 2))
class PCENLayer(tf.keras.layers.Layer):
"""Per-Channel Energy Normalization.
This applies a fixed or learnable normalization by an exponential moving
average smoother, and a compression.
See https://arxiv.org/abs/1607.05666 for more details.
"""
def __init__(self,
alpha: float = 0.96,
smooth_coef: float = 0.04,
delta: float = 2.0,
root: float = 2.0,
floor: float = 1e-6,
trainable: bool = False,
learn_smooth_coef: bool = False,
per_channel_smooth_coef: bool = False,
name='PCEN'):
"""PCEN constructor.
Args:
alpha: float, exponent of EMA smoother
smooth_coef: float, smoothing coefficient of EMA
delta: float, bias added before compression
root: float, one over exponent applied for compression (r in the paper)
floor: float, offset added to EMA smoother
trainable: bool, False means fixed_pcen, True is trainable_pcen
learn_smooth_coef: bool, True means we also learn the smoothing
coefficient
per_channel_smooth_coef: bool, True means each channel has its own smooth
coefficient
name: str, name of the layer
"""
super().__init__(name=name)
self._alpha_init = alpha
self._delta_init = delta
self._root_init = root
self._smooth_coef = smooth_coef
self._floor = floor
self._trainable = trainable
self._learn_smooth_coef = learn_smooth_coef
self._per_channel_smooth_coef = per_channel_smooth_coef
def build(self, input_shape):
num_channels = input_shape[-1]
self.alpha = self.add_weight(
name='alpha',
shape=[num_channels],
initializer=tf.keras.initializers.Constant(self._alpha_init),
trainable=self._trainable)
self.delta = self.add_weight(
name='delta',
shape=[num_channels],
initializer=tf.keras.initializers.Constant(self._delta_init),
trainable=self._trainable)
self.root = self.add_weight(
name='root',
shape=[num_channels],
initializer=tf.keras.initializers.Constant(self._root_init),
trainable=self._trainable)
if self._learn_smooth_coef:
self.ema = ExponentialMovingAverage(
coeff_init=self._smooth_coef,
per_channel=self._per_channel_smooth_coef,
trainable=True)
else:
self.ema = tf.keras.layers.SimpleRNN(
units=num_channels,
activation=None,
use_bias=False,
kernel_initializer=tf.keras.initializers.Identity(
gain=self._smooth_coef),
recurrent_initializer=tf.keras.initializers.Identity(
gain=1. - self._smooth_coef),
return_sequences=True,
trainable=False)
def call(self, inputs):
alpha = tf.math.minimum(self.alpha, 1.0)
root = tf.math.maximum(self.root, 1.0)
ema_smoother = self.ema(inputs, initial_state=tf.gather(inputs, 0, axis=1))
one_over_root = 1. / root
output = ((inputs / (self._floor + ema_smoother)**alpha + self.delta)
**one_over_root - self.delta**one_over_root)
return output