|
algo: |
|
ddpg: |
|
params: |
|
target_update_tau: 0.01 |
|
policy: |
|
exploration: |
|
sigma: 0.3 |
|
theta: 0.15 |
|
deterministic_params: |
|
buffer_batch_size: 32 |
|
min_buffer_size: 10000 |
|
n_train_steps: 500 |
|
qf_lr: 0.0001 |
|
steps_per_epoch: 1 |
|
dqn: |
|
params: |
|
clip_gradient: 10 |
|
deterministic_eval: true |
|
double_q: false |
|
target_update_freq: 2 |
|
policy: |
|
exploration: |
|
decay_ratio: 0.5 |
|
max_epsilon: 1.0 |
|
min_epsilon: 0.05 |
|
general_params: |
|
discount: 0.99 |
|
package: garage |
|
policy: |
|
hidden_sizes: |
|
- 128 |
|
- 128 |
|
pretrained_policy: null |
|
ppo: |
|
params: |
|
center_adv: false |
|
tanhnormal: false |
|
pretrain: |
|
additional_config: null |
|
algo_to_pretrain: null |
|
params: |
|
episodes_per_batch: 10 |
|
loss: log_prob |
|
policy_lr: 0.01 |
|
pretrain_algo: rbc |
|
replay_buffer: |
|
buffer_size: 200000 |
|
rnd: |
|
batch_size: 64 |
|
bound_reward_weight: null |
|
bound_reward_weight_initial_ratio: 0.999999 |
|
bound_reward_weight_transient_epochs: 10 |
|
hidden_sizes: |
|
- 64 |
|
- 64 |
|
intrinsic_reward_weight: 0.01 |
|
n_train_steps: 32 |
|
output_dim: 128 |
|
predictor_lr: 0.001 |
|
standardize_extrinsic_reward: true |
|
standardize_intrinsic_reward: true |
|
sampler: |
|
n_workers: 16 |
|
type: ray |
|
train: |
|
batch_size: 50000 |
|
n_epochs: 100 |
|
steps_per_epoch: 32 |
|
type: ppo |
|
context: |
|
disable_logging: false |
|
experiment_name: null |
|
log_dir: |
|
from_keys: |
|
- microgrid.config.scenario |
|
- microgrid.methods.set_forecaster.forecaster |
|
- microgrid.methods.set_module_attrs.battery_transition_model |
|
- context.seed |
|
- env.domain_randomization.noise_std |
|
- algo.ppo.tanhnormal |
|
- algo.rnd.intrinsic_reward_weight |
|
parent: /home/ahalev/data/GridRL/paper_experiments |
|
use_existing_dir: false |
|
seed: 42 |
|
snapshot_gap: 10 |
|
verbose: 0 |
|
wandb: |
|
api_key_file: ../../local/wandb_api_key.txt |
|
group: null |
|
log_density: 1 |
|
plot_baseline: |
|
- mpc |
|
- rbc |
|
username: ahalev |
|
env: |
|
cls: DiscreteMicrogridEnv |
|
domain_randomization: |
|
noise_std: 0.01 |
|
relative_noise: true |
|
forced_genset: null |
|
net_load: |
|
slack_module: grid |
|
use: true |
|
observation_keys: |
|
- soc |
|
- net_load |
|
- import_price_current |
|
- import_price_forecast_0 |
|
- import_price_forecast_1 |
|
- import_price_forecast_2 |
|
- import_price_forecast_3 |
|
- import_price_forecast_4 |
|
microgrid: |
|
attributes: |
|
reward_shaping_func: !BaselineShaper |
|
baseline_module: false |
|
module: |
|
- grid |
|
- 0 |
|
config: |
|
scenario: 0 |
|
methods: |
|
set_forecaster: |
|
forecast_horizon: 23 |
|
forecaster: 0.0 |
|
forecaster_increase_uncertainty: true |
|
forecaster_relative_noise: true |
|
set_module_attrs: |
|
battery_transition_model: null |
|
normalized_action_bounds: |
|
- 0.0 |
|
- 1.0 |
|
trajectory: |
|
evaluate: |
|
final_step: -1 |
|
initial_step: 5840 |
|
trajectory_func: null |
|
train: |
|
final_step: 5840 |
|
initial_step: 0 |
|
trajectory_func: !FixedLengthStochasticTrajectory |
|
trajectory_length: 720 |
|
verbose: 1 |
|
|