Skip to content

Commit

Permalink
Merge branch 'pytorch:main' into add_miniPile
Browse files Browse the repository at this point in the history
  • Loading branch information
lessw2020 authored Feb 27, 2024
2 parents 65ee4de + 5a1689f commit d1bf5d6
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 55 deletions.
4 changes: 2 additions & 2 deletions test/test_job_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ class TestJobConfig:
def test_command_line_args(self):
config = JobConfig()
config.parse_args([])
assert config.model.name == "llama"
assert config.training.steps == -1

def test_job_config_file(self):
config = JobConfig()
config.parse_args(["--job.config_file", "./train_configs/debug_model.toml"])
assert config.model.name == "llama"
assert config.training.steps == 10

def test_job_file_does_not_exist(self):
with pytest.raises(FileNotFoundError):
Expand Down
11 changes: 7 additions & 4 deletions torchtrain/config_manager.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
import argparse
Expand All @@ -17,16 +20,16 @@ class JobConfig:
Semantics:
- Default config is loaded from a toml file. If no toml file is provided,
then the default config is loaded from argparse defaults.
- if toml file has missing keys, they are filled with argparse defaults.
"""

def parse_args(self, args_list: list = sys.argv[1:]):
args = JobConfig.init_args_from_command_line(args_list)
config_file = getattr(args, "job.config_file", None)
if config_file is None:
args_dict = self._args_to_two_level_dict(args)
else:
args_dict = self._args_to_two_level_dict(args)
if config_file is not None:
with open(config_file, "rb") as f:
args_dict = tomllib.load(f)
args_dict |= tomllib.load(f)
for k, v in args_dict.items():
class_type = type(k.title(), (), v)
setattr(self, k, class_type())
Expand Down
40 changes: 25 additions & 15 deletions torchtrain/datasets/alpaca.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class AlpacaDataset(IterableDataset):
seq_len (int): max sequence length
world_size (int): number of data parallel processes participating in training
rank (int): rank of the current data parallel process
infinite: whether to loop infinitely over the dataset
Data input format:
{
Expand All @@ -43,38 +44,47 @@ def __init__(
seq_len: int = 2048,
world_size: int = 1,
rank: int = 0,
infinite: bool = False,
**kwargs
) -> None:
# TODO: This is a temporary solution for small datasets like Alpaca.
# For larger datasets we need to use a more scalable approach.
# Setting `streaming=True` works for large dataset, but the speed is slow.
ds = load_dataset("tatsu-lab/alpaca", split="train")
self.data_iterator = iter(split_dataset_by_node(ds, rank, world_size))
self._data = split_dataset_by_node(ds, rank, world_size)
self._tokenizer = tokenizer
self.seq_len = seq_len
self.infinite = infinite

def __iter__(self):
max_buffer_token_len = 1 + self.seq_len
all_tokens: List[int] = []

for sample in self.data_iterator:
sample_text = sample["text"]
sample_tokens = self._tokenizer.encode(sample_text, bos=True, eos=True)
all_tokens.extend(sample_tokens)
while True:
for sample in iter(self._data):
sample_text = sample["text"]
sample_tokens = self._tokenizer.encode(sample_text, bos=True, eos=True)
all_tokens.extend(sample_tokens)

while len(all_tokens) >= max_buffer_token_len:
x = torch.LongTensor(all_tokens[:max_buffer_token_len])
# batched_x = x.reshape(self.batch_size, -1)
# update tokens to the remaining tokens
all_tokens = all_tokens[max_buffer_token_len:]
input = x[:-1]
label = x[1:]
yield input, label
while len(all_tokens) >= max_buffer_token_len:
x = torch.LongTensor(all_tokens[:max_buffer_token_len])
# update tokens to the remaining tokens
all_tokens = all_tokens[max_buffer_token_len:]
input = x[:-1]
label = x[1:]
yield input, label
if not self.infinite:
break


def build_alpaca_data_loader(
tokenizer: TokenizerIf, batch_size: int, seq_len: int, world_size, rank
tokenizer: TokenizerIf,
batch_size: int,
seq_len: int,
world_size: int,
rank: int,
infinite: bool = True,
):
alpaca_ds = AlpacaDataset(tokenizer, seq_len, world_size, rank)
alpaca_ds = AlpacaDataset(tokenizer, seq_len, world_size, rank, infinite)

return DataLoader(alpaca_ds, batch_size=batch_size)
51 changes: 26 additions & 25 deletions torchtrain/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,20 @@

from torchtrain.logging_utils import rank0_log

_gb_in_bytes = 1024 * 1024 * 1024
_mb_in_bytes = 1024 * 1024
# note that GiB (gibibyte) is 1024, vs GB is 1000
_gib_in_bytes = 1024 * 1024 * 1024
_mib_in_bytes = 1024 * 1024


def format_to_gb(item, precision=4):
"""quick function to format numbers to gigabyte and round to (default) 4 digit precision"""
metric_num = item / _gb_in_bytes
def _format_to_gib(item, precision=4):
"""quick function to format numbers to gibibyte and round to (default) 4 digit precision"""
metric_num = item / _gib_in_bytes
metric_num = round(metric_num, ndigits=precision)
return metric_num


def convert_to_gpu_pct(value, total_gpu_memory):
return round(100 * (value / total_gpu_memory), 2)
def _convert_to_gpu_pct(value, total_gpu_memory, precision=4):
return round(100 * (value / total_gpu_memory), precision)


# named tuple for passing memory stats (as % of device capacity) for Tensorboard logging
Expand Down Expand Up @@ -58,7 +59,7 @@ def __init__(self, device: str = "cuda:0"):
self.device_capacity = torch.cuda.get_device_properties(
self.device
).total_memory
self.device_capacity_gb = format_to_gb(self.device_capacity)
self.device_capacity_gib = _format_to_gib(self.device_capacity)
self.num_retries = 0
self.num_ooms = 0
self.peak_active_memory = 0
Expand All @@ -67,17 +68,17 @@ def __init__(self, device: str = "cuda:0"):
self.curr_reserved_memory = 0

self.device_reserved_memory_usage = 0
self.device_reserved_memory_gb = 0
self.device_reserved_memory_gib = 0
self.device_reserved_memory_pct = 0

self.device_active_memory_usage = 0
self.device_active_memory_gb = 0
self.device_active_memory_gib = 0
self.device_active_memory_pct = 0

# current stats
self.device_alloc_memory_usage = torch.cuda.memory_allocated(self.device)
self.device_alloc_memory_gb = format_to_gb(self.device_alloc_memory_usage)
self.device_alloc_memory_pct = convert_to_gpu_pct(
self.device_alloc_memory_gib = _format_to_gib(self.device_alloc_memory_usage)
self.device_alloc_memory_pct = _convert_to_gpu_pct(
self.device_alloc_memory_usage, self.device_capacity
)

Expand All @@ -90,10 +91,8 @@ def get_pct_memory(self, memory_num):
pct_memory = round(100 * (pct_memory), 2)
return pct_memory

def get_gb_memory(self, memory_num):
gb_memory = memory_num / _gb_in_bytes
gb_memory = round(gb_memory, 2)
return gb_memory
def get_gib_memory(self, memory_num):
return _format_to_gib(memory_num, precision=2)

def get_current_stats(self, return_data: bool = False):
"""
Expand All @@ -104,21 +103,23 @@ def get_current_stats(self, return_data: bool = False):
curr_mem = torch.cuda.memory_stats(self.device)

self.device_alloc_memory_usage = curr_mem["allocated_bytes.all.current"]
self.device_alloc_memory_gb = format_to_gb(self.device_alloc_memory_usage)
self.device_alloc_memory_pct = convert_to_gpu_pct(
self.device_alloc_memory_gib = _format_to_gib(self.device_alloc_memory_usage)
self.device_alloc_memory_pct = _convert_to_gpu_pct(
self.device_alloc_memory_usage, self.device_capacity
)

self.device_reserved_memory_usage = curr_mem["reserved_bytes.all.current"]
self.device_reserved_memory_gb = format_to_gb(self.device_reserved_memory_usage)
self.device_reserved_memory_pct = convert_to_gpu_pct(
self.device_reserved_memory_gib = _format_to_gib(
self.device_reserved_memory_usage
)
self.device_reserved_memory_pct = _convert_to_gpu_pct(
self.device_reserved_memory_usage, self.device_capacity
)

self.device_active_memory_usage = curr_mem["active_bytes.all.current"]
self.device_active_memory_gb = format_to_gb(self.device_active_memory_usage)
self.device_active_memory_pct = convert_to_gpu_pct(
self.device_active_memory_usage, self.device_capacity
self.device_active_memory_gib = _format_to_gib(self.device_active_memory_usage)
self.device_active_memory_pct = _convert_to_gpu_pct(
self.device_active_memory_usage, self.device_capacity, precision=2
)

display_str = ""
Expand Down Expand Up @@ -179,8 +180,8 @@ def reset_peak_stats(self):

def __str__(self):
_ = self.get_current_stats()
display_str = f"{self.device_name} ({self.device_index}): {self.device_capacity_gb} GB capacity, "
display_str += f"{self.device_alloc_memory_gb} GB in-use, {self.device_alloc_memory_pct}% in-use"
display_str = f"{self.device_name} ({self.device_index}): {self.device_capacity_gib} GiB capacity, "
display_str += f"{self.device_alloc_memory_gib} GiB in-use, {self.device_alloc_memory_pct}% in-use"
return f"{display_str}"


Expand Down
35 changes: 35 additions & 0 deletions torchtrain/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.

from dataclasses import dataclass
from typing import Union

import torch
Expand All @@ -17,3 +18,37 @@ def dist_max(x: Union[int, float], mesh: DeviceMesh) -> float:
def dist_mean(x: Union[int, float], mesh: DeviceMesh) -> float:
tensor = torch.tensor(x).cuda()
return funcol.all_reduce(tensor, reduceOp=c10d.ReduceOp.AVG.name, group=mesh)


@dataclass
class Color:
black = "\033[30m"
red = "\033[31m"
green = "\033[32m"
yellow = "\033[33m"
blue = "\033[34m"
magenta = "\033[35m"
cyan = "\033[36m"
white = "\033[37m"
reset = "\033[39m"


@dataclass
class Background:
black = "\033[40m"
red = "\033[41m"
green = "\033[42m"
yellow = "\033[43m"
blue = "\033[44m"
magenta = "\033[45m"
cyan = "\033[46m"
white = "\033[47m"
reset = "\033[49m"


@dataclass
class Style:
bright = "\033[1m"
dim = "\033[2m"
normal = "\033[22m"
reset = "\033[0m"
Loading

0 comments on commit d1bf5d6

Please sign in to comment.