Skip to content

Commit

Permalink
Fix CI issue (#11521)
Browse files Browse the repository at this point in the history
Signed-off-by: Boxiang Wang <[email protected]>
  • Loading branch information
BoxiangW authored Dec 10, 2024
1 parent 8583201 commit d9677ce
Showing 1 changed file with 12 additions and 10 deletions.
22 changes: 12 additions & 10 deletions nemo/collections/llm/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -968,16 +968,18 @@ def _validate_config(

# CP validation
if trainer.strategy.context_parallel_size > 1:
if model.config.seq_length is not None:
assert (
model.config.seq_length % (trainer.strategy.context_parallel_size * 2) == 0
), 'Sequence length must be divisible by 2 * context parallel size if context parallel is used.'
if hasattr(model, "config"):
if model.config.seq_length is not None:
assert (
model.config.seq_length % (trainer.strategy.context_parallel_size * 2) == 0
), 'Sequence length must be divisible by 2 * context parallel size if context parallel is used.'

# EP validation
if trainer.strategy.expert_model_parallel_size > 1:
assert (
model.config.num_moe_experts is not None
), "num_experts must be non None to use expert model parallelism"
assert (
model.config.num_moe_experts % trainer.strategy.expert_model_parallel_size == 0
), "Number of experts should be a multiple of expert model parallel_size."
if hasattr(model, "config"):
assert (
model.config.num_moe_experts is not None
), "num_experts must be non None to use expert model parallelism"
assert (
model.config.num_moe_experts % trainer.strategy.expert_model_parallel_size == 0
), "Number of experts should be a multiple of expert model parallel_size."

0 comments on commit d9677ce

Please sign in to comment.