From 22fa5ae34d70bc58d32dae25cfeca09178e46398 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Sat, 15 Jun 2024 16:26:12 +0200 Subject: [PATCH] added spark connect example which does not yet work with kotlin-spark-api --- buildSrc/src/main/kotlin/Dependencies.kt | 2 + gradle/bootstraps/compiler-plugin.jar | Bin 47032 -> 47033 bytes gradle/bootstraps/gradle-plugin.jar | Bin 9347 -> 9347 bytes .../kotlinx/spark/api/jupyter/Integration.kt | 5 +- kotlin-spark-api/build.gradle.kts | 2 +- .../jetbrains/kotlinx/spark/api/RddTest.kt | 6 +- settings.gradle.kts | 3 + spark-connect-examples/build.gradle.kts | 60 ++++++++++++++++++ .../jetbrains/kotlinx/spark/examples/Main.kt | 27 ++++++++ 9 files changed, 98 insertions(+), 7 deletions(-) create mode 100644 spark-connect-examples/build.gradle.kts create mode 100644 spark-connect-examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Main.kt diff --git a/buildSrc/src/main/kotlin/Dependencies.kt b/buildSrc/src/main/kotlin/Dependencies.kt index 20fce75d..d19181c8 100644 --- a/buildSrc/src/main/kotlin/Dependencies.kt +++ b/buildSrc/src/main/kotlin/Dependencies.kt @@ -4,6 +4,8 @@ object Dependencies : Dsl { inline val scalaLibrary get() = "org.scala-lang:scala-library:${Versions.scala}" inline val kotlinxHtml get() = "org.jetbrains.kotlinx:kotlinx-html-jvm:${Versions.kotlinxHtml}" inline val sparkSql get() = "org.apache.spark:spark-sql_${Versions.scalaCompat}:${Versions.spark}" + inline val sparkSqlApi get() = "org.apache.spark:spark-sql-api_${Versions.scalaCompat}:${Versions.spark}" + inline val sparkConnectClient get() = "org.apache.spark:spark-connect-client-jvm_${Versions.scalaCompat}:${Versions.spark}" inline val sparkMl get() = "org.apache.spark:spark-mllib_${Versions.scalaCompat}:${Versions.spark}" inline val sparkStreaming get() = "org.apache.spark:spark-streaming_${Versions.scalaCompat}:${Versions.spark}" inline val hadoopClient get() = "org.apache.hadoop:hadoop-client:${Versions.hadoop}" diff --git a/gradle/bootstraps/compiler-plugin.jar b/gradle/bootstraps/compiler-plugin.jar index fa1826de8afb94e8b131e9df9cf82993e8643427..6de5e4692c87e16ecd876e8d19245fbcd3135f33 100644 GIT binary patch delta 1697 zcmZux3piA17(R2xATfim7?Zmwf4S#=Vvv27Bh$({B6h|2*e^zyE!||2zNxo%4O{dAN2S7IUz}3P?ea zurOqJy){`(1IvzWO*Xd0@>em1*x7_uWFJNu;J27P0HZJl0IM-%3DnI6LI=#B(4mAZ z>Z!_Oq0hp)h*-LeI%6TIR}guF(?F=O1l=Qucv#A;Vq-8+p+(Dn+_0;%veKSGJ6nUZ z?YqcRB$~Umj0S(&=&q`rZ|7;?FeTFOPY{kZs~Y&d}KjLoYt>6P^!-GXTAM{ zJTaG+7BN3_Vw1Nl6eaW_&b#qlU4I!#Vbz`8eP9u`X0x;hCN|=m);JGWqxX-^(#`#z z$}Y9=ZheoZ)%7PdQtL>*OfQ?D?al%eSxK6So9jKMG1DqeS&pZq)>xmpS>G-b(=Z%r zX@*Uge929Gd%4On;&u~!oecjdYC^p^bfG>p#+QmytHUPEQ-%_+~IWi6E=*F+zNw(mY7N%4Ktcf9AeRqTV>3 zdDp(aR#%0w&U{(c&g8CTwfjC_RzI5^#cgw8jamAY&uq7;Ms1mtLEHHjY+@L{t(vG5nolmY^%w#^1R=%urjbvo-1v5 z=y|r7m_3Dws0K{Z@l~9GNA$du*F9tp(@k<_Z&jp^PAbK-&-Ov;MB^on;dr2pL%2+P zhvllap~-f+iY}h^t;|>%zg&0G1Dopgg?LZz-Ma>+ju}x3Z2}^&CgNswb6Fx}II@!0A%XA%*C>@r4k3UP#?lGM{9`4jAvl5$rgKm*OCB~~>cQ0(4 zzL?wJp|9DqOZmv3X)O;|ExR-nd$jG1DWTGKY%2+hllOUY^ImIqimdasU}x%(+toGA zW{-s6SJa_IVVXhnFeA8zI+Q6~BQb8QI&@#JRCctuW`E+wgWzT9PVK%q-|sSla-QI) zSS`N_$5%$vfNJ)P3l&>Jduk=t_4>?^y0G{C}rKJz3){3oUt)i4B15h@V z`oQZ_B@vEi0&2ZL7#U{DBeRuTIpCp^aK^tRDI_>U7&%w9C10u{!h3Q)$@Cl&vhY#b zM5VFX&LFTv~)g6n}|G*N_y^AHZccw9VcMO$G&hj((OD OFT}CNTTr}&PyP)tlkTJd delta 1720 zcmZWq3pkWn82)E4B#dfYGEH)CbD4&6x5%Y3+Ze{8R77DWKJo^2+qh5!U2G~&4V_IHXmKxK% z12~W$t8LfAi7d@|OYiWpg%Ok3er2w~(a|?os`%W<>JM`FSswiTC;MA04^dDoUJ>`= zcB2yCKN-;sf37t=S2DcMt{A(J(Awj<{8K27x3e-zY#?mni3EEsC7oV3dfu5vKh>Fd-CP>+#Z>CA zjKI(NUDYeBgd7c7qYP5S&B9?jW+o@JD@xMJnUPGot-699BP!rj3C~k09R6~KQ_vSn z>P7BI-+)vBokYOsGm{#hq$vbXPE%J66>6IuQd!F>ltPL@{kg9XJTw1>S@bSF>q~`x z_1va`N}+9PaC0Q>G=& zFwQvWa!MlZM@~`9@0?$Jbj6^cxiHfek7r6IQXem-v1j#u%ubp;9vGr%=wG;ae6Ph; z{pW`omFZIvF)p~~b~jSZZ)LQ6*Yv(Q$$WZDt`1{kkn`62wUSQm-+Jte0hIFC$h)(WawjN6{!mVc@I zE?zu%dRte|*_pE*oJ3vBl&69s&-ZP3qE~WQl2nw&;|d4vyx-d{sd?M$O$tLHPq-G_ zCWn=yOxlAVaRuzr>$;AnMT({Jh-x~?cAQIVCu)+vw!g`8!g#P_VWQ}M^*Z?-L2Z#uc2kYFH%i(udd^Fnw4 zo(6xCFa}3d3q=-UZ8Qpj0!FM%J`xx>w90m1n5*rV07kGjeXG!bM+nY~PD9MzaAd*^(JH$bDav2_AQ&|`HBCn-w(5uB#@IaahJXt3L z*gv=C2xDeoxWdN3pg(yctMcU4iu{umnE2{5XCKR3uyo57 z->&l9?W|sxnzC1H5V@n#w+C#$Z%=~?{g+5JPp>qYseIyePZJih;dU7-A$ z{LOW$4mVcsEI+riI_=p#>)*%Y_y1vgptvvT#)sy4r8%b$H(1Zx;8*j~>Rz@tPenn} z$HwHVOfO|V_K8RuwEZ!(e>&~--g)(3AAPv-?gWnz&&%B7#ZQ9Uj_mC%6VqJ6t1>as zN_eT`i5D71PuLpQ8{OQ-?08#!LDsah zMJunadAn_2<;L}oZQBm*X7BXU^Itjh1#jEcb+t#@*X69#wRE?)`M>YvZ?nVK_RCz6 z$(}K>WL7Y*th~`0-C}S#KQVXUAFh9ha|0j@45Q4>h`kZ zKeiNmEzHo=Ib++l=0(Bu^Pi@#oF!T%_j<>A3sI}bLHuXj(_Gt6ZrEfjwt3%H)$r8; z@)>_Gv)twJB`QRg`Ye*A@%FM^4bEk zRavRuavZOptM^ea)vcd@u;bS~<9y6!iv@q` z^My+*HN>0G)qCl$ZVsP$%CoF2^sKPfrC;|hO!m87;Szuh&cPq)0wl|-> zn^2_^`F(fFv~rpAd3?Xy%Ql>UDO;*tyDD)ud*3X7hs96%oBRIW37q^WrGDYJJ4P!m zs4IFtjdTgDRm_;|pD|5swtDmtF1^4b!J!c`orS9f^K_4iD&5&Cv0d)rz5c@M%6Hfe z-=($e7JZ!OzQfw^U0%!Wj>6f3?;cBpcNJW-p5yjE#Nu~T+O(|lO)JlCzi_|h%k;?! zOo59Htv`xu9BuG>Id9({w~Mc~|IpqN9iBh&ujJ``Cx^qE3+}kheZ$x~-E%=vgA4Oj zHn|JU?YuUJ)Ol;V&TBY4FtoFGh&v!~L&$@332T^Bedq#(4SEH!50)LsZQRW?n@P3t z(S1aQ1?9$ORxeO)+q0CBS{k^lez delta 1152 zcmZqnZ1&_0@MdNaVPN3kV35e$G?7=08AO?RfvM&&Fm*=}L``DU22qps7>&Vn3Zo^M zUchK82o#S<(W&kZV`gBu!p6X$KY1dn^5oTu{F4=!`06!%lg%##MP*EW6ni`Kr`N4V zQd&B!=3UC%Q-3{*CuYDZ4 zP4dg!KIvDI7p8RF%h<-3b@tcwNS%Wb3I@jutkq81o}V6LW%tuJ-mHC{chJwi`|=j+ zWZ&#OJnymTE}rl*{r(G+8Xtevc&FAm)7Y?hlfChs`JTTof8=vsCjO)TaO0(j|NRBB z*Cp0{Wx7}T_T#F{n^&4oKjxAtU}V+Bv(xdxYM*Uy9WH$1Trn$kb4-?LcJP%W=`9)4^G4Rg)99X3KwGs2!c`_69oYtL?aTq;dv?z0f*a;a^V4_5qSF^T$T@Z!dK8B2@3&InE7pMgM5l6|elbcz=S#|RT z)($pMcHA7zBg_nD6!L%L182}IIa6>3Rh$=W5zN5AkOag|6DLYeJ}K7?RuC?40;c=q z?ZNZ~d4Djiq@WL`Llhu6eS(4`n14mV9Zbt9dUL>X{^UZ%P_W=h#RxF1qErQ@7b-0R S({9SOVEUM{GnnR4DF*=G`2&0a diff --git a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt index 715fa94a..30b9b27b 100644 --- a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt +++ b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt @@ -162,10 +162,11 @@ abstract class Integration(private val notebook: Notebook, private val options: } beforeCellExecution { - if (scalaCompatVersion.toDouble() >= 2.13) + if (scalaCompatVersion.toDouble() >= 2.13) { execute("scala.`Console\$`.`MODULE\$`.setOutDirect(System.out)") - else + } else { execute("""scala.Console.setOut(System.out)""") + } beforeCellExecution() } diff --git a/kotlin-spark-api/build.gradle.kts b/kotlin-spark-api/build.gradle.kts index 812af551..9e0097d7 100644 --- a/kotlin-spark-api/build.gradle.kts +++ b/kotlin-spark-api/build.gradle.kts @@ -42,7 +42,7 @@ dependencies { // https://github.com/FasterXML/jackson-bom/issues/52 if (Versions.spark == "3.3.1") implementation(jacksonDatabind) - if (Versions.sparkConnect) TODO("unsupported for now") + // if (Versions.sparkConnect) TODO("unsupported for now") implementation( kotlinStdLib, diff --git a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/RddTest.kt b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/RddTest.kt index 7bd1ca7b..51a97c3d 100644 --- a/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/RddTest.kt +++ b/kotlin-spark-api/src/test/kotlin/org/jetbrains/kotlinx/spark/api/RddTest.kt @@ -1,14 +1,12 @@ package org.jetbrains.kotlinx.spark.api import io.kotest.core.spec.style.ShouldSpec -import io.kotest.core.spec.style.Test -import io.kotest.core.test.TestScope import io.kotest.matchers.collections.shouldContainAll import io.kotest.matchers.shouldBe import org.apache.spark.api.java.JavaRDD -import org.jetbrains.kotlinx.spark.api.tuples.* +import org.jetbrains.kotlinx.spark.api.tuples.X +import org.jetbrains.kotlinx.spark.api.tuples.t import scala.Tuple2 -import java.io.Serializable class RddTest : ShouldSpec({ context("RDD extension functions") { diff --git a/settings.gradle.kts b/settings.gradle.kts index 98776e06..07822dec 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -21,9 +21,11 @@ gradleEnterprise { val spark: String by settings val scala: String by settings val skipScalaOnlyDependent: String by settings +val sparkConnect: String by settings System.setProperty("spark", spark) System.setProperty("scala", scala) System.setProperty("skipScalaOnlyDependent", skipScalaOnlyDependent) +System.setProperty("sparkConnect", sparkConnect) val scalaCompat get() = scala.substringBeforeLast('.') @@ -37,6 +39,7 @@ include("scala-tuples-in-kotlin") include("kotlin-spark-api") include("jupyter") include("examples") +include("spark-connect-examples") include("compiler-plugin") include("gradle-plugin") diff --git a/spark-connect-examples/build.gradle.kts b/spark-connect-examples/build.gradle.kts new file mode 100644 index 00000000..c1f20c0a --- /dev/null +++ b/spark-connect-examples/build.gradle.kts @@ -0,0 +1,60 @@ +import org.jetbrains.kotlin.gradle.dsl.JvmTarget + +plugins { + // Needs to be installed in the local maven repository or have the bootstrap jar on the classpath + id("org.jetbrains.kotlinx.spark.api") + kotlin("jvm") + application +} + +// run with `./gradlew run` +application { + mainClass = "org.jetbrains.kotlinx.spark.examples.MainKt" + + // workaround for java 17 + applicationDefaultJvmArgs = listOf("--add-opens", "java.base/java.nio=ALL-UNNAMED") +} + +kotlinSparkApi { + enabled = true + sparkifyAnnotationFqNames = listOf("org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify") +} + +group = Versions.groupID +version = Versions.project + +repositories { + mavenLocal() + mavenCentral() +} + +dependencies { + Projects { + implementation( + // TODO kotlinSparkApi, + ) + } + + Dependencies { + + // IMPORTANT! + compileOnly(sparkSqlApi) + implementation(sparkConnectClient) + } +} + +// spark-connect seems to work well with java 17 as client and java 1.8 as server +// also set gradle and your project sdk to java 17 +kotlin { + jvmToolchain { + languageVersion = JavaLanguageVersion.of(17) + } + compilerOptions { + jvmTarget = JvmTarget.JVM_17 + } +} + +tasks.withType { + sourceCompatibility = JavaVersion.VERSION_17.toString() + targetCompatibility = JavaVersion.VERSION_17.toString() +} diff --git a/spark-connect-examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Main.kt b/spark-connect-examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Main.kt new file mode 100644 index 00000000..790bad24 --- /dev/null +++ b/spark-connect-examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Main.kt @@ -0,0 +1,27 @@ +package org.jetbrains.kotlinx.spark.examples + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.connect.client.REPLClassDirMonitor + +// run with `./gradlew run` or set VM options: "--add-opens=java.base/java.nio=ALL-UNNAMED" in the IDE +fun main() { + val spark = + SparkSession + .builder() + .remote("sc://localhost") + .create() + + val classFinder = REPLClassDirMonitor("/mnt/data/Projects/kotlin-spark-api/spark-connect-examples/build/classes") + spark.registerClassFinder(classFinder) + spark.addArtifact("/mnt/data/Projects/kotlin-spark-api/spark-connect-examples/build/libs/spark-connect-examples-2.0.0-SNAPSHOT.jar") + + spark.sql("select 1").show() + + spark.stop() +} + +//@Sparkify +//data class Person( +// val name: String, +// val age: Int, +//)