mirror of
https://github.com/apple/ml-4m.git
synced 2024-07-16 14:20:27 +03:00
46 lines
1.1 KiB
YAML
46 lines
1.1 KiB
YAML
# Config for DDP
|
|
|
|
# Arch: SwiGLU No Bias
|
|
# Modalities: Mix of rgb2all and all2all, with alphas=0.5
|
|
# To be run on 64 GPUs for batch size = 8192
|
|
run_name: auto
|
|
|
|
# Input & output
|
|
num_input_tokens: 128
|
|
num_target_tokens: 128
|
|
loss_type: mod
|
|
|
|
# Architecture
|
|
model: fm_base_12e_12d_swiglu_nobias
|
|
patch_size: 16
|
|
input_size: 224
|
|
dtype: bfloat16
|
|
tokenizer_path: "fourm/utils/tokenizer/trained/text_tokenizer_4m_wordpiece_30k.json"
|
|
|
|
# Train
|
|
epochs: -1
|
|
total_tokens: 500 # in billions
|
|
opt: adamw
|
|
blr: 0.0001 # this is base_lr = 1e-4, lr = base_lr * batch_size / 256
|
|
min_blr: 0.
|
|
warmup_epochs: -1
|
|
warmup_tokens: 10 # in billions
|
|
batch_size: 128 # 128 x 64 = 8192
|
|
|
|
# Data
|
|
data_config: "cfgs/default/4m/data/cc12m/main/mix_mod7_all2all_rgb2all_a0.5.yaml"
|
|
s3_data_endpoint: "/path/to/endpoint" # Change me
|
|
eval_freq: 1
|
|
fixed_eval: True
|
|
epoch_size: 10_000_000 # Number of samples per "epoch"
|
|
|
|
# Saving
|
|
save_ckpt_freq: 1
|
|
output_dir: 'output/auto'
|
|
|
|
# Wandb
|
|
log_wandb: False # Set to True to log to Weights & Biases
|
|
wandb_project: '4m-train'
|
|
wandb_entity: null # Change if needed
|
|
wandb_run_name: auto
|