Skip to content

Commit

Permalink
Update Patch v0.5.1
Browse files Browse the repository at this point in the history
  • Loading branch information
seungahdev committed Aug 20, 2024
1 parent 8e345e3 commit 9fa25be
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 6 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ The command line arguments means :
- **`mode`**: Quantization techniques to apply. You can use `fp8`, `int8`.
- **`pedantic-level`**: Represent to accuracy-latency trade-off. Higher pedantic level ensure a more accurate representaition of the model, but increase the quantization processing time. Defaults to 1.
- **`device`**: Device to run the quantization process. Defaults to "cuda:0".
- **`offload`**: When enabled, this option significantly reduces GPU memory usage by offloading model layers onto CPU RAM. Defaults to true.
- **`offload`**: When enabled, this option significantly reduces GPU memory usage by offloading model layers onto CPU RAM. Defaults to False.

## Example: Run FP8 quantization with Meta-Llama-3-8B-Instruct
```bash
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "friendli-model-optimizer"
version = "0.5.0"
version = "0.5.1"
authors = [
{ name = "FriendliAI teams", email = "[email protected]" },
]
Expand Down
6 changes: 4 additions & 2 deletions src/fmo/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def quantize(
False,
"--offload",
help=(
"When enabled, significantly reduces GPU memory usage by offloading model layers onto CPU RAM. Defaults to true."
"When enabled, significantly reduces GPU memory usage by offloading model layers onto CPU RAM. Defaults to False."
),
),
seed: Optional[int] = typer.Option(
Expand Down Expand Up @@ -138,7 +138,9 @@ def quantize(
os.mkdir(output_dir)

dataset = safe_load_datasets(
dataset_name_or_path=dataset_name_or_path, split_name=dataset_split_name
dataset_name_or_path=dataset_name_or_path,
split_name=dataset_split_name,
cache_dir=cache_dir,
)
tokenizer = get_tokenizer(
model_name_or_path=model_name_or_path, cache_dir=cache_dir
Expand Down
7 changes: 5 additions & 2 deletions src/fmo/utils/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
def safe_load_datasets(
dataset_name_or_path: str,
split_name: Optional[str],
cache_dir: Optional[str] = None,
) -> datasets.Dataset:
"""Load dataset from calibration dataset config."""
try:
Expand All @@ -28,11 +29,13 @@ def safe_load_datasets(
else:
data_name_parts = dataset_name_or_path.split(":")
if len(data_name_parts) == 1:
dataset = datasets.load_dataset(dataset_name_or_path, split=split_name)
dataset = datasets.load_dataset(
dataset_name_or_path, split=split_name, cache_dir=cache_dir
)
elif len(data_name_parts) == 2:
data_name, subset_name = data_name_parts
dataset = datasets.load_dataset(
data_name, subset_name, split=split_name
data_name, subset_name, split=split_name, cache_dir=cache_dir
)
else:
raise QuantizationError(
Expand Down

0 comments on commit 9fa25be

Please sign in to comment.