Files
ml-4m/cfgs/default/4m/models/main/4m-b_mod7_500b.yaml
David Mizrahi 43558d16cc first commit
Co-authored-by: Roman Bachmann <roman.bachmann@epfl.ch>
2024-04-08 15:04:42 -07:00

46 lines
1.1 KiB
YAML

# Config for DDP
# Arch: SwiGLU No Bias
# Modalities: Mix of rgb2all and all2all, with alphas=0.5
# To be run on 64 GPUs for batch size = 8192
run_name: auto
# Input & output
num_input_tokens: 128
num_target_tokens: 128
loss_type: mod
# Architecture
model: fm_base_12e_12d_swiglu_nobias
patch_size: 16
input_size: 224
dtype: bfloat16
tokenizer_path: "fourm/utils/tokenizer/trained/text_tokenizer_4m_wordpiece_30k.json"
# Train
epochs: -1
total_tokens: 500 # in billions
opt: adamw
blr: 0.0001 # this is base_lr = 1e-4, lr = base_lr * batch_size / 256
min_blr: 0.
warmup_epochs: -1
warmup_tokens: 10 # in billions
batch_size: 128 # 128 x 64 = 8192
# Data
data_config: "cfgs/default/4m/data/cc12m/main/mix_mod7_all2all_rgb2all_a0.5.yaml"
s3_data_endpoint: "/path/to/endpoint" # Change me
eval_freq: 1
fixed_eval: True
epoch_size: 10_000_000 # Number of samples per "epoch"
# Saving
save_ckpt_freq: 1
output_dir: 'output/auto'
# Wandb
log_wandb: False # Set to True to log to Weights & Biases
wandb_project: '4m-train'
wandb_entity: null # Change if needed
wandb_run_name: auto