I want to use exponential moving average model to train the date.The code is like this:
class EMA():
def __init__(self, model, decay):
self.model = model
self.decay = decay
self.shadow = {}
self.backup = {}
def register(self):
params = self.model.collect_params()
for name in params:
self.shadow[name] = paramsp[name].data().copy()
# after trainer.step()
def update(self):
params = self.model.collect_params()
for name in params:
assert name in self.shadow
new_average = (1.0 - self.decay) * params[name].data() + self.decay * self.shadow[name]
self.shadow[name] = new_average.copy()
# when eval
def apply_shadow(self):
params = self.model.collect_params()
for name in params:
assert name in self.shadow
self.backup[name] = params[name].data()
params[name].data() = self.shadow[name]
def restore(self):
params = self.model.collect_params()
for name in params:
assert name in self.backup
params[name].data() = self.backup[name]
self.backup = {}
How to modify the code for running in multi-gpus?