From 5d95266f04c585a0f2be7140f5ba275ea0895f31 Mon Sep 17 00:00:00 2001
From: Lu Fang <lufang@fb.com>
Date: Wed, 15 Jan 2025 21:47:00 -0800
Subject: [PATCH] Changes done internally at Facebook

baa5235524cdb0c60ff8a448c802ff575122b986 Lu Fang <lufang@fb.com> Eliminate c10::optional usage in vllm_flash_attn

Signed-off-by: Lu Fang <lufang@fb.com>
---
 csrc/flash_attn/flash_api.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/csrc/flash_attn/flash_api.cpp b/csrc/flash_attn/flash_api.cpp
index 9bd46a424..fc32f8675 100644
--- a/csrc/flash_attn/flash_api.cpp
+++ b/csrc/flash_attn/flash_api.cpp
@@ -346,14 +346,14 @@ mha_fwd_sparse(at::Tensor &q,         // batch_size x seqlen_q x num_heads x hea
                const at::Tensor &block_offset,
                const at::Tensor &column_count,
                const at::Tensor &column_index,
-               const c10::optional<at::Tensor> &out_,             // batch_size x seqlen_q x num_heads x head_size
-               const c10::optional<at::Tensor> &alibi_slopes_, // num_heads or batch_size x num_heads
+               const std::optional<at::Tensor> &out_,             // batch_size x seqlen_q x num_heads x head_size
+               const std::optional<at::Tensor> &alibi_slopes_, // num_heads or batch_size x num_heads
                const double p_dropout,
                const double softmax_scale,
                bool is_causal,
                const double softcap,
                const bool return_softmax,
-               c10::optional<at::Generator> gen_) {
+               std::optional<at::Generator> gen_) {
 
     auto dprops = at::cuda::getCurrentDeviceProperties();
     // bool is_sm75 = dprops->major == 7 && dprops->minor == 5;
@@ -515,11 +515,11 @@ mha_varlen_fwd_sparse(at::Tensor &q,  // total_q x num_heads x head_size, total_
                       const at::Tensor &block_offset,
                       const at::Tensor &column_count,
                       const at::Tensor &column_index,
-                      const c10::optional<at::Tensor> &out_, // total_q x num_heads x head_size, total_k := \sum_{i=0}^{b} s_i
+                      const std::optional<at::Tensor> &out_, // total_q x num_heads x head_size, total_k := \sum_{i=0}^{b} s_i
                       const at::Tensor &cu_seqlens_q,  // b+1
                       const at::Tensor &cu_seqlens_k,  // b+1
-                      const c10::optional<at::Tensor> &seqused_k, // b. If given, only this many elements of each batch element's keys are used.
-                      const c10::optional<at::Tensor> &alibi_slopes_, // num_heads or b x num_heads
+                      const std::optional<at::Tensor> &seqused_k, // b. If given, only this many elements of each batch element's keys are used.
+                      const std::optional<at::Tensor> &alibi_slopes_, // num_heads or b x num_heads
                       int64_t max_seqlen_q,
                       const int64_t max_seqlen_k,
                       const double p_dropout,
@@ -528,7 +528,7 @@ mha_varlen_fwd_sparse(at::Tensor &q,  // total_q x num_heads x head_size, total_
                       bool is_causal,
                       const double softcap,
                       const bool return_softmax,
-                      c10::optional<at::Generator> gen_) {
+                      std::optional<at::Generator> gen_) {
 
     auto dprops = at::cuda::getCurrentDeviceProperties();
     // bool is_sm75 = dprops->major == 7 && dprops->minor == 5;