Skip to content

Commit

Permalink
Changes done internally at Facebook
Browse files Browse the repository at this point in the history
baa5235524cdb0c60ff8a448c802ff575122b986 Lu Fang <[email protected]> Eliminate c10::optional usage in vllm_flash_attn

Signed-off-by: Lu Fang <[email protected]>
  • Loading branch information
houseroad committed Jan 16, 2025
1 parent 323b789 commit 5d95266
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions csrc/flash_attn/flash_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,14 +346,14 @@ mha_fwd_sparse(at::Tensor &q, // batch_size x seqlen_q x num_heads x hea
const at::Tensor &block_offset,
const at::Tensor &column_count,
const at::Tensor &column_index,
const c10::optional<at::Tensor> &out_, // batch_size x seqlen_q x num_heads x head_size
const c10::optional<at::Tensor> &alibi_slopes_, // num_heads or batch_size x num_heads
const std::optional<at::Tensor> &out_, // batch_size x seqlen_q x num_heads x head_size
const std::optional<at::Tensor> &alibi_slopes_, // num_heads or batch_size x num_heads
const double p_dropout,
const double softmax_scale,
bool is_causal,
const double softcap,
const bool return_softmax,
c10::optional<at::Generator> gen_) {
std::optional<at::Generator> gen_) {

auto dprops = at::cuda::getCurrentDeviceProperties();
// bool is_sm75 = dprops->major == 7 && dprops->minor == 5;
Expand Down Expand Up @@ -515,11 +515,11 @@ mha_varlen_fwd_sparse(at::Tensor &q, // total_q x num_heads x head_size, total_
const at::Tensor &block_offset,
const at::Tensor &column_count,
const at::Tensor &column_index,
const c10::optional<at::Tensor> &out_, // total_q x num_heads x head_size, total_k := \sum_{i=0}^{b} s_i
const std::optional<at::Tensor> &out_, // total_q x num_heads x head_size, total_k := \sum_{i=0}^{b} s_i
const at::Tensor &cu_seqlens_q, // b+1
const at::Tensor &cu_seqlens_k, // b+1
const c10::optional<at::Tensor> &seqused_k, // b. If given, only this many elements of each batch element's keys are used.
const c10::optional<at::Tensor> &alibi_slopes_, // num_heads or b x num_heads
const std::optional<at::Tensor> &seqused_k, // b. If given, only this many elements of each batch element's keys are used.
const std::optional<at::Tensor> &alibi_slopes_, // num_heads or b x num_heads
int64_t max_seqlen_q,
const int64_t max_seqlen_k,
const double p_dropout,
Expand All @@ -528,7 +528,7 @@ mha_varlen_fwd_sparse(at::Tensor &q, // total_q x num_heads x head_size, total_
bool is_causal,
const double softcap,
const bool return_softmax,
c10::optional<at::Generator> gen_) {
std::optional<at::Generator> gen_) {

auto dprops = at::cuda::getCurrentDeviceProperties();
// bool is_sm75 = dprops->major == 7 && dprops->minor == 5;
Expand Down

0 comments on commit 5d95266

Please sign in to comment.