Skip to content

Commit

Permalink
add lightllm_vllm_kernel (#576)
Browse files Browse the repository at this point in the history
  • Loading branch information
WANDY666 authored Oct 24, 2024
1 parent b1bde26 commit 789637f
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions lightllm/models/deepseek2/layer_infer/fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,13 @@
import triton
import triton.language as tl
from lightllm.utils.log_utils import init_logger
import lightllm.models.deepseek2.layer_infer._custom_ops as ops
from lightllm.models.llama.triton_kernel.silu_and_mul import silu_and_mul_fwd

try:
from lightllm_vllm_kernel import moe_align_block_size as moe_align_block_size_kernel
except ImportError:
from lightllm.models.deepseek2.layer_infer._custom_ops import moe_align_block_size as moe_align_block_size_kernel


logger = init_logger(__name__)

Expand Down Expand Up @@ -218,7 +222,7 @@ def moe_align_block_size(
max_num_m_blocks = triton.cdiv(max_num_tokens_padded, block_size)
expert_ids = torch.empty((max_num_m_blocks,), dtype=torch.int32, device=topk_ids.device)
num_tokens_post_pad = torch.empty((1), dtype=torch.int32, device=topk_ids.device)
ops.moe_align_block_size(topk_ids, num_experts, block_size, sorted_ids, expert_ids, num_tokens_post_pad)
moe_align_block_size_kernel(topk_ids, num_experts, block_size, sorted_ids, expert_ids, num_tokens_post_pad)
return sorted_ids, expert_ids, num_tokens_post_pad


Expand Down

0 comments on commit 789637f

Please sign in to comment.