ENTROPY_BETA = 0.005// factor for entropy boosted exploration
LR_A = 0.00005// learning rate for actor
LR_C = 0.0001// learning rate for critic
GLOBAL_RUNNING_R = []
GLOBAL_EP = 0// will increase during training, stop training when it >= MAX_GLOBAL_EP////////////////////////////////////// Asynchronous Advantage Actor Critic (A3C) ////////////////////////////////////////////////////////////////////////