Skip to content

Commit

Permalink
sql: add scalar functions (#511)
Browse files Browse the repository at this point in the history
  • Loading branch information
alissa-tung authored Aug 13, 2021
1 parent ad53a0e commit 1fa06c7
Show file tree
Hide file tree
Showing 9 changed files with 310 additions and 19 deletions.
11 changes: 11 additions & 0 deletions hstream-sql/etc/SQL.cf
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,17 @@ ScalarFuncStrlen. ScalarFunc ::= "STRLEN(" ValueExpr ")" ;
ScalarFuncIfNull. ScalarFunc ::= "IFNULL(" ValueExpr "," ValueExpr ")" ;
ScalarFuncNullIf. ScalarFunc ::= "NULLIF(" ValueExpr "," ValueExpr ")" ;

ScalarFuncDateStr. ScalarFunc ::= "DATETOSTRING(" ValueExpr "," ValueExpr ")" ;
ScalarFuncStrDate. ScalarFunc ::= "STRINGTODATE(" ValueExpr "," ValueExpr ")" ;

ScalarFuncSplit. ScalarFunc ::= "SPLIT(" ValueExpr "," ValueExpr ")" ;
ScalarFuncChunksOf. ScalarFunc ::= "CHUNKSOF(" ValueExpr "," ValueExpr ")" ;

ScalarFuncTake. ScalarFunc ::= "TAKE(" ValueExpr "," ValueExpr ")" ;
ScalarFuncTakeEnd. ScalarFunc ::= "TAKEEND(" ValueExpr "," ValueExpr ")" ;
ScalarFuncDrop. ScalarFunc ::= "DROP(" ValueExpr "," ValueExpr ")" ;
ScalarFuncDropEnd. ScalarFunc ::= "DROPEND(" ValueExpr "," ValueExpr ")" ;

ArrayFuncContain. ScalarFunc ::= "ARRAY_CONTAIN(" ValueExpr "," ValueExpr ")" ;
ArrayFuncDistinct. ScalarFunc ::= "ARRAY_DISTINCT(" ValueExpr ")" ;
ArrayFuncExcept. ScalarFunc ::= "ARRAY_EXCEPT(" ValueExpr "," ValueExpr ")" ;
Expand Down
4 changes: 3 additions & 1 deletion hstream-sql/hstream-sql.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ test-suite hstream-sql-test
type: exitcode-stdio-1.0
main-is: Spec.hs
other-modules:
HStream.SQL.CodegenSpec
HStream.SQL.Codegen.ArraySpec
HStream.SQL.Codegen.MathSpec
HStream.SQL.ParseRefineSpec
HStream.SQL.ValidateSpec
hs-source-dirs: test
Expand All @@ -115,6 +116,7 @@ test-suite hstream-sql-test
, hstream-sql
, hstream-common
, hstream-processing
, HUnit
, random ^>=1.2
, scientific
, text
Expand Down
14 changes: 12 additions & 2 deletions hstream-sql/src/HStream/SQL/AST.hs
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,10 @@ data Constant = ConstantNull

data BinaryOp = OpAdd | OpSub | OpMul
| OpAnd | OpOr
| OpContain | OpExcept | OpIntersect | OpRemove | OpUnion | OpArrJoin'
| OpIfNull | OpNullIf
| OpContain | OpExcept | OpIntersect | OpRemove | OpUnion | OpArrJoin'
| OpIfNull | OpNullIf | OpDateStr | OpStrDate
| OpSplit | OpChunksOf
| OpTake | OpTakeEnd | OpDrop | OpDropEnd
deriving (Eq, Show)

data UnaryOp = OpSin | OpSinh | OpAsin | OpAsinh | OpCos | OpCosh
Expand Down Expand Up @@ -141,6 +143,14 @@ instance Refine ValueExpr where -- FIXME: Inconsistent form (Position instead of
(ExprScalarFunc _ (ArrayFuncRemove _ e1 e2)) -> RExprBinOp (trimSpacesPrint expr) OpRemove (refine e1) (refine e2)
(ExprScalarFunc _ (ArrayFuncUnion _ e1 e2)) -> RExprBinOp (trimSpacesPrint expr) OpUnion (refine e1) (refine e2)
(ExprScalarFunc _ (ArrayFuncJoinWith _ e1 e2)) -> RExprBinOp (trimSpacesPrint expr) OpArrJoin' (refine e1) (refine e2)
(ExprScalarFunc _ (ScalarFuncDateStr _ e1 e2)) -> RExprBinOp (trimSpacesPrint expr) OpDateStr (refine e1) (refine e2)
(ExprScalarFunc _ (ScalarFuncStrDate _ e1 e2)) -> RExprBinOp (trimSpacesPrint expr) OpStrDate (refine e1) (refine e2)
(ExprScalarFunc _ (ScalarFuncSplit _ e1 e2)) -> RExprBinOp (trimSpacesPrint expr) OpSplit (refine e1) (refine e2)
(ExprScalarFunc _ (ScalarFuncChunksOf _ e1 e2)) -> RExprBinOp (trimSpacesPrint expr) OpChunksOf (refine e1) (refine e2)
(ExprScalarFunc _ (ScalarFuncTake _ e1 e2)) -> RExprBinOp (trimSpacesPrint expr) OpTake (refine e1) (refine e2)
(ExprScalarFunc _ (ScalarFuncTakeEnd _ e1 e2)) -> RExprBinOp (trimSpacesPrint expr) OpTakeEnd (refine e1) (refine e2)
(ExprScalarFunc _ (ScalarFuncDrop _ e1 e2)) -> RExprBinOp (trimSpacesPrint expr) OpDrop (refine e1) (refine e2)
(ExprScalarFunc _ (ScalarFuncDropEnd _ e1 e2)) -> RExprBinOp (trimSpacesPrint expr) OpDropEnd (refine e1) (refine e2)
(ExprInt _ n) -> RExprConst (trimSpacesPrint expr) (ConstantInt . fromInteger . refine $ n) -- WARNING: May lose presision
(ExprNum _ n) -> RExprConst (trimSpacesPrint expr) (ConstantNum $ refine n)
(ExprString _ s) -> RExprConst (trimSpacesPrint expr) (ConstantString s)
Expand Down
70 changes: 70 additions & 0 deletions hstream-sql/src/HStream/SQL/Internal/Codegen.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,13 @@ import HStream.SQL.AST
import HStream.SQL.Exception (SomeRuntimeException (..),
SomeSQLException (..),
throwSQLException)
import HStream.Utils
import qualified Prelude as Prelude
import RIO
import Text.StringRandom (stringRandomIO)
import qualified Z.Data.CBytes as ZCB
import qualified Z.Data.Text as ZT
import Z.IO.Time

--------------------------------------------------------------------------------
getFieldByName :: HasCallStack => Object -> Text -> Value
Expand Down Expand Up @@ -88,7 +93,46 @@ binOpOnValue OpUnion _ Null = Null
binOpOnValue OpUnion (Array xs) (Array ys) = Array (nub $ xs <> ys)
binOpOnValue OpArrJoin' (Array xs) (String s) = String (arrJoinPrim xs (Just s))
binOpOnValue OpIfNull Null x = x
binOpOnValue OpIfNull x _ = x
binOpOnValue OpNullIf x y = if x == y then Null else x
binOpOnValue OpDateStr (Number date) (String fmt) = String $ dateToStrGMT date fmt
binOpOnValue OpStrDate (String date) (String fmt) = Number $ strToDateGMT date fmt
binOpOnValue OpSplit Null _ = Null
binOpOnValue OpSplit _ Null = Null
binOpOnValue OpSplit (String x) (String xs) = Array . V.fromList $ String <$>
(if T.length x == 1
then T.split (== T.head x)
else T.splitOn x) xs
binOpOnValue OpChunksOf _ Null = Null
binOpOnValue OpChunksOf (Number n) (String xs) = Array . V.fromList $ String <$>
T.chunksOf (case toBoundedInteger n of
Just x -> x
_ -> throwSQLException CodegenException Nothing
("Operation OpChunksOf on chunks of size " ++ show n ++ " is not supported")) xs
binOpOnValue OpTake _ Null = Null
binOpOnValue OpTake (Number n) (String xs) = String $
T.take (case toBoundedInteger n of
Just x -> x
_ -> throwSQLException CodegenException Nothing
("Operation OpTake on size " ++ show n ++ " is not supported")) xs
binOpOnValue OpTakeEnd _ Null = Null
binOpOnValue OpTakeEnd (Number n) (String xs) = String $
T.takeEnd (case toBoundedInteger n of
Just x -> x
_ -> throwSQLException CodegenException Nothing
("Operation OpTakeEnd on size " ++ show n ++ " is not supported")) xs
binOpOnValue OpDrop _ Null = Null
binOpOnValue OpDrop (Number n) (String xs) = String $
T.drop (case toBoundedInteger n of
Just x -> x
_ -> throwSQLException CodegenException Nothing
("Operation OpDrop on size " ++ show n ++ " is not supported")) xs
binOpOnValue OpDropEnd _ Null = Null
binOpOnValue OpDropEnd (Number n) (String xs) = String $
T.dropEnd (case toBoundedInteger n of
Just x -> x
_ -> throwSQLException CodegenException Nothing
("Operation OpDropEnd on size " ++ show n ++ " is not supported")) xs
binOpOnValue op v1 v2 =
throwSQLException CodegenException Nothing ("Operation " <> show op <> " on " <> show v1 <> " and " <> show v2 <> " is not supported")

Expand Down Expand Up @@ -230,3 +274,29 @@ arrJoinPrim xs delimiterM | null xs = T.empty
ifNull :: Value -> Value -> Value
ifNull Null = id
ifNull x = const x

strToDateGMT :: T.Text -> T.Text -> Scientific
strToDateGMT date fmt = parseSystemTimeGMT (case timeFmt fmt of
Just x -> x
_ -> throwSQLException CodegenException Nothing
("Operation OpStrDate on time format " <> show fmt <> " is not supported")) (textToCBytes date)
& systemSeconds & Prelude.toInteger & flip scientific 0

dateToStrGMT :: Scientific -> T.Text -> T.Text
dateToStrGMT date fmt =
let sysTime = MkSystemTime (case toBoundedInteger date of
Just x -> x
_ -> throwSQLException CodegenException Nothing "Impossible happened...") 0
in formatSystemTimeGMT (case timeFmt fmt of
Just x -> x
_ -> throwSQLException CodegenException Nothing
("Operation OpDateStr on time format " <> show fmt <> " is not supported")) sysTime
& cBytesToText

timeFmt :: T.Text -> Maybe ZCB.CBytes
timeFmt fmt
| textToCBytes fmt == simpleDateFormat = Just simpleDateFormat
| textToCBytes fmt == iso8061DateFormat = Just iso8061DateFormat
| textToCBytes fmt == webDateFormat = Just webDateFormat
| textToCBytes fmt == mailDateFormat = Just mailDateFormat
| otherwise = Nothing
32 changes: 32 additions & 0 deletions hstream-sql/src/HStream/SQL/Validate/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ instance HaveValueExpr ScalarFunc where
(ArrayFuncMax _ e) -> e
(ArrayFuncMin _ e) -> e
(ArrayFuncSort _ e) -> e
(ScalarFuncDateStr _ e _) -> e
(ScalarFuncStrDate _ e _) -> e
(ScalarFuncSplit _ e _) -> e
(ScalarFuncChunksOf _ e _) -> e
(ScalarFuncTake _ e _) -> e
(ScalarFuncTakeEnd _ e _) -> e
(ScalarFuncDrop _ e _) -> e
(ScalarFuncDropEnd _ e _) -> e

class HavePos a where
getPos :: a -> BNFC'Position
Expand Down Expand Up @@ -131,6 +139,14 @@ instance HavePos ScalarFunc where
(ArrayFuncMax pos _) -> pos
(ArrayFuncMin pos _) -> pos
(ArrayFuncSort pos _) -> pos
(ScalarFuncDateStr pos _ _) -> pos
(ScalarFuncStrDate pos _ _) -> pos
(ScalarFuncSplit pos _ _) -> pos
(ScalarFuncChunksOf pos _ _) -> pos
(ScalarFuncTake pos _ _) -> pos
(ScalarFuncTakeEnd pos _ _) -> pos
(ScalarFuncDrop pos _ _) -> pos
(ScalarFuncDropEnd pos _ _) -> pos

instance HavePos SearchCond where
getPos cond = case cond of
Expand Down Expand Up @@ -214,6 +230,14 @@ getScalarFuncType f = case f of
(ArrayFuncMax _ _) -> 0b0100_0000
(ArrayFuncMin _ _) -> 0b0100_0000
(ArrayFuncSort _ _) -> 0b0100_0000
(ScalarFuncDateStr _ _ _) -> 0b0010_0000
(ScalarFuncStrDate _ _ _) -> 0b0000_0100
(ScalarFuncSplit _ _ _) -> 0b0100_0000
(ScalarFuncChunksOf _ _ _) -> 0b0100_0000
(ScalarFuncTake _ _ _) -> 0b0010_0000
(ScalarFuncTakeEnd _ _ _) -> 0b0010_0000
(ScalarFuncDrop _ _ _) -> 0b0010_0000
(ScalarFuncDropEnd _ _ _) -> 0b0010_0000

getScalarArgType :: ScalarFunc -> Word8
getScalarArgType f = case f of
Expand Down Expand Up @@ -270,6 +294,14 @@ getScalarArgType f = case f of
(ArrayFuncMax _ _) -> anyMask
(ArrayFuncMin _ _) -> anyMask
(ArrayFuncSort _ _) -> anyMask
(ScalarFuncDateStr _ _ _) -> numMask
(ScalarFuncStrDate _ _ _) -> stringMask
(ScalarFuncSplit _ _ _) -> stringMask
(ScalarFuncChunksOf _ _ _) -> stringMask
(ScalarFuncTake _ _ _) -> anyMask
(ScalarFuncTakeEnd _ _ _) -> anyMask
(ScalarFuncDrop _ _ _) -> anyMask
(ScalarFuncDropEnd _ _ _) -> anyMask

isTypeInt, isTypeFloat, isTypeNum, isTypeOrd, isTypeBool, isTypeString :: Word8 -> Bool
isTypeInt n = n .&. intMask /= 0
Expand Down
104 changes: 104 additions & 0 deletions hstream-sql/test/HStream/SQL/Codegen/ArraySpec.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}

module HStream.SQL.Codegen.ArraySpec where

import qualified Data.Aeson as A
import Data.Function
import HStream.SQL.AST
import HStream.SQL.Internal.Codegen
import Test.Hspec

spec :: Spec
spec = describe "Array Scalar Functions" do

it "ARRAY_DISTINCT__0" do
unaryOpOnValue OpDistinct (A.Number <$> [1, 1, 2, 3, 1, 2] & A.Array)
`shouldBe` (A.Number <$> [1, 2, 3] & A.Array)

it "ARRAY_DISTINCT__1" do
unaryOpOnValue OpDistinct ([A.String "apple", A.String "apple", A.Null, A.String "cherry"] & A.Array)
`shouldBe` ([A.String "apple", A.Null, A.String "cherry"] & A.Array)

it "ARRAY_EXCEPT__0" do
binOpOnValue OpExcept (A.Number <$> [1, 2, 3, 1, 2] & A.Array) (A.Number <$> [2, 3] & A.Array)
`shouldBe` A.Array [A.Number 1]

it "ARRAY_EXCEPT__1" do
binOpOnValue OpExcept (A.Array [A.String "apple", A.String "apple", A.Null, A.String "cherry"])
(A.Array [A.String "cherry"]) `shouldBe` A.Array [A.String "apple", A.Null]

it "ARRAY_INTERSECT__0" do
(binOpOnValue OpIntersect `on` (\x -> A.Number <$> x & A.Array)) [1, 2, 3, 1, 2] [2, 1]
`shouldBe` (A.Number <$> [1, 2] & A.Array)

it "ARRAY_INTERSECT__1" do
binOpOnValue OpIntersect (A.Array [A.String "apple", A.String "apple", A.Null, A.String "cherry"])
(A.Array [A.String "apple"]) `shouldBe` A.Array [A.String "apple"]

--- derived decEq
it "ARRAY_MAX__0" do
unaryOpOnValue OpArrMax (A.Array [A.Number (-1), A.Number 2, A.Null, A.Number 0])
`shouldBe` A.Null

it "ARRAY_MAX__1" do
unaryOpOnValue OpArrMax (A.Array [A.Bool True, A.Bool False])
`shouldBe` A.Bool True

it "ARRAY_MAX__2" do
unaryOpOnValue OpArrMax (A.Array [A.Number 23, A.Number 24, A.String "r"])
`shouldBe` A.Number 24

it "ARRAY_MAX__3" do
unaryOpOnValue OpArrMax (A.Array [A.String "Foo", A.String "Bar", A.String "baz"])
`shouldBe` A.String "baz"

it "ARRAY_MIN__0" do
unaryOpOnValue OpArrMin (A.Array [A.Number (-1), A.Number 2, A.Null, A.Number 0])
`shouldBe` A.Number (-1)

it "ARRAY_MIN__1" do
unaryOpOnValue OpArrMin (A.Array [A.Bool True, A.Bool False])
`shouldBe` A.Bool False

it "ARRAY_MIN__2" do
unaryOpOnValue OpArrMin (A.Array [A.Number 23, A.Number 24, A.String "r"])
`shouldBe` A.String "r"

it "ARRAY_MIN__3" do
unaryOpOnValue OpArrMin (A.Array [A.String "Foo", A.String "Bar", A.String "baz"])
`shouldBe` A.String "Bar"

it "ARRAY_REMOVE__0" do
binOpOnValue OpRemove (A.Number <$> [1, 2, 3, 2, 1] & A.Array) (A.Number 2)
`shouldBe` (A.Number <$> [1, 3, 1] & A.Array)

it "ARRAY_REMOVE__1" do
binOpOnValue OpRemove (A.Array [A.Bool False, A.Null, A.Bool True, A.Bool True]) (A.Bool False)
`shouldBe` A.Array [A.Null, A.Bool True, A.Bool True]

it "ARRAY_REMOVE__2" do
binOpOnValue OpRemove (A.Array [A.String "Foo", A.String "Bar", A.Null, A.String "baz"]) A.Null
`shouldBe` A.Array [A.String "Foo", A.String "Bar", A.String "baz"]

it "ARRAY_SORT__0" do
unaryOpOnValue OpSort (A.Array [A.Number (-1), A.Number 2, A.Null, A.Number 0])
`shouldBe` A.Array [A.Number (-1), A.Number 0, A.Number 2, A.Null]

it "ARRAY_SORT__1" do
unaryOpOnValue OpSort (A.Array [A.Bool False, A.Null, A.Bool True])
`shouldBe` A.Array [A.Bool False, A.Bool True, A.Null]

it "ARRAY_SORT__2" do
unaryOpOnValue OpSort (A.Array [A.String "Foo", A.String "Bar", A.Null, A.String "baz"])
`shouldBe` A.Array [A.String "Bar", A.String "Foo", A.String "baz", A.Null]

it "ARRAY_UNION__0" do
(binOpOnValue OpUnion `on` (\xs -> A.Number <$> xs & A.Array))
[1, 2, 3, 1, 2] [4, 1] `shouldBe` (\xs -> A.Number <$> xs & A.Array) [1, 2, 3, 4]

it "ARRAY_UNION__1" do
binOpOnValue OpUnion (A.Array [A.String "apple", A.String "apple", A.Null, A.String "cherry"])
(A.Array [A.String "cherry"]) `shouldBe` A.Array [A.String "apple", A.Null, A.String "cherry"]
29 changes: 29 additions & 0 deletions hstream-sql/test/HStream/SQL/Codegen/MathSpec.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}

module HStream.SQL.Codegen.MathSpec where

import qualified Data.Aeson as A
import Data.Function
import Data.Scientific
import HStream.SQL.AST
import HStream.SQL.Internal.Codegen
import Test.HUnit
import Test.Hspec

spec :: Spec
spec = describe "Math Scalar Functions" do

-- Arith Expr

it "binary add" do
binOpOnValue OpAdd (A.Number 1) (A.Number 2) `shouldBe` (A.Number 3)

it "floor" do
unaryOpOnValue OpFloor (A.Number 1.5)
`shouldBe` A.Number 1

it "ceil" do
unaryOpOnValue OpCeil (A.Number 1.5) `shouldBe` A.Number 2
14 changes: 0 additions & 14 deletions hstream-sql/test/HStream/SQL/CodegenSpec.hs

This file was deleted.

Loading

0 comments on commit 1fa06c7

Please sign in to comment.