diff --git a/.travis.yml b/.travis.yml index 70167f3ede..8501bc1079 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,7 +30,7 @@ branches: language: scala jdk: - - oraclejdk8 + - openjdk8 services: - docker @@ -47,15 +47,9 @@ env: matrix: - BUILD_TYPE=Unit METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=PGSQL - - BUILD_TYPE=Integration - METADATA_REP=ELASTICSEARCH EVENTDATA_REP=PGSQL MODELDATA_REP=S3 - PIO_ELASTICSEARCH_VERSION=1.7.3 - - BUILD_TYPE=Integration - METADATA_REP=ELASTICSEARCH EVENTDATA_REP=ELASTICSEARCH MODELDATA_REP=S3 - PIO_ELASTICSEARCH_VERSION=5.6.9 - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=ELASTICSEARCH MODELDATA_REP=S3 - PIO_ELASTICSEARCH_VERSION=6.4.2 + PIO_ELASTICSEARCH_VERSION=6.8.1 - BUILD_TYPE=Integration METADATA_REP=ELASTICSEARCH EVENTDATA_REP=HBASE MODELDATA_REP=LOCALFS PIO_HBASE_VERSION=1.2.6 @@ -96,11 +90,6 @@ env: PIO_SCALA_VERSION=2.11.12 PIO_SPARK_VERSION=2.2.3 PIO_HADOOP_VERSION=2.7.7 - - BUILD_TYPE=Integration - METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS - PIO_SCALA_VERSION=2.11.12 - PIO_SPARK_VERSION=2.3.3 - PIO_HADOOP_VERSION=2.7.7 - BUILD_TYPE=Integration METADATA_REP=PGSQL EVENTDATA_REP=PGSQL MODELDATA_REP=HDFS PIO_SCALA_VERSION=2.11.12 diff --git a/LICENSE.txt b/LICENSE.txt index 6e02f9bccb..e06a1bcca3 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1708,7 +1708,6 @@ Binary distribution bundles org.scala-lang.modules # scala-parser-combinators_2.11 # 1.0.6 (http://scala-lang.org/) org.scala-lang.modules # scala-parser-combinators_2.11 # 1.1.0 (http://scala-lang.org/) org.scala-lang.modules # scala-xml_2.11 # 1.0.5 (http://scala-lang.org/) - org.scala-lang.modules # scala-xml_2.11 # 1.0.6 (http://scala-lang.org/) which is available under the BSD license (http://www.scala-lang.org/downloads/license.html) diff --git a/PMC.md b/PMC.md index 18050cbe27..40e342db5d 100644 --- a/PMC.md +++ b/PMC.md @@ -27,38 +27,38 @@ http://apache.org/dev/openpgp.html#generate-key on how to generate a strong code signing key. 2. Add your public key to the `KEYS` file at the root of the source code tree. 3. Create a new release branch, with version bumped to the next release version. - * `git checkout -b release/0.14.0` - * Replace all `0.14.0-SNAPSHOT` in the code tree to `0.14.0` - * `git commit -am "Prepare 0.14.0-rc1"` - * `git tag -am "Apache PredictionIO 0.14.0-rc1" v0.14.0-rc1` + * `git checkout -b release/0.15.0` + * Replace all `0.15.0-SNAPSHOT` in the code tree to `0.15.0` + * `git commit -am "Prepare 0.15.0-rc1"` + * `git tag -am "Apache PredictionIO 0.15.0-rc1" v0.15.0-rc1` 4. Push the release branch and tag to the apache git repo. 5. Wait for Travis to pass build on the release branch. 6. Package a clean tarball for staging a release candidate. - * `git archive --format tar v0.14.0-rc1 > - ../apache-predictionio-0.14.0-rc1.tar` - * `cd ..; gzip apache-predictionio-0.14.0-rc1.tar` + * `git archive --format tar v0.15.0-rc1 > + ../apache-predictionio-0.15.0-rc1.tar` + * `cd ..; gzip apache-predictionio-0.15.0-rc1.tar` 7. Generate detached signature for the release candidate. (http://apache.org/dev/release-signing.html#openpgp-ascii-detach-sig) - * `gpg --armor --output apache-predictionio-0.14.0-rc1.tar.gz.asc - --detach-sig apache-predictionio-0.14.0-rc1.tar.gz` + * `gpg --armor --output apache-predictionio-0.15.0-rc1.tar.gz.asc + --detach-sig apache-predictionio-0.15.0-rc1.tar.gz` 8. Generate SHA512 checksums for the release candidate. - * `gpg --print-md SHA512 apache-predictionio-0.14.0-rc1.tar.gz > - apache-predictionio-0.14.0-rc1.tar.gz.sha512` + * `gpg --print-md SHA512 apache-predictionio-0.15.0-rc1.tar.gz > + apache-predictionio-0.15.0-rc1.tar.gz.sha512` 9. Run `./make-distribution.sh` and repeat steps 6 to 8 to create binary distribution release. - * `mv PredictionIO-0.14.0.tar.gz apache-predictionio-0.14.0-bin.tar.gz` - * `gpg --armor --output apache-predictionio-0.14.0-bin.tar.gz.asc - --detach-sig apache-predictionio-0.14.0-bin.tar.gz` - * `gpg --print-md SHA512 apache-predictionio-0.14.0-bin.tar.gz > - apache-predictionio-0.14.0-bin.tar.gz.sha512` + * `mv PredictionIO-0.15.0.tar.gz apache-predictionio-0.15.0-bin.tar.gz` + * `gpg --armor --output apache-predictionio-0.15.0-bin.tar.gz.asc + --detach-sig apache-predictionio-0.15.0-bin.tar.gz` + * `gpg --print-md SHA512 apache-predictionio-0.15.0-bin.tar.gz > + apache-predictionio-0.15.0-bin.tar.gz.sha512` 10. If you have not done so, use SVN to checkout https://dist.apache.org/repos/dist/dev/predictionio. This is the area for staging release candidates for voting. * `svn co https://dist.apache.org/repos/dist/dev/predictionio` 11. Create a subdirectory at the SVN staging area. The area should have a `KEYS` file. - * `mkdir apache-predictionio-0.14.0-rc1` - * `cp apache-predictionio-0.14.0-* apache-predictionio-0.14.0-rc1` + * `mkdir apache-predictionio-0.15.0-rc1` + * `cp apache-predictionio-0.15.0-* apache-predictionio-0.15.0-rc1` 12. If you have updated the `KEYS` file, also copy that to the staging area. -13. `svn commit -m "Apache PredictionIO 0.14.0-rc1"` +13. `svn commit -m "Apache PredictionIO 0.15.0-rc1"` 14. Set up credentials with Apache Nexus using the SBT Sonatype plugin. Put this in `~/.sbt/1.0/sonatype.sbt`. @@ -78,26 +78,27 @@ Close the staged repository on Apache Nexus. 16. Send out email for voting on PredictionIO dev mailing list. ``` - Subject: [VOTE] Apache PredictionIO 0.14.0 Release (RC1) + Subject: [VOTE] Apache PredictionIO 0.15.0 Release (RC1) - This is the vote for 0.14.0 of Apache PredictionIO. + This is the vote for 0.15.0 of Apache PredictionIO. The vote will run for at least 72 hours and will close on Apr 7th, 2017. - The release candidate artifacts can be downloaded here: https://dist.apache.org/repos/dist/dev/predictionio/apache-predictionio-0.14.0-rc1/ + The release candidate artifacts can be downloaded here: https://dist.apache.org/repos/dist/dev/predictionio/apache-predictionio-0.15.0-rc1/ Test results of RC1 can be found here: https://travis-ci.org/apache/predictionio/builds/xxx Maven artifacts are built from the release candidate artifacts above, and are provided as convenience for testing with engine templates. The Maven artifacts are provided at the Maven staging repo here: https://repository.apache.org/content/repositories/orgapachepredictionio-nnnn/ - All JIRAs completed for this release are tagged with 'FixVersion = 0.14.0'. You can view them here: https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12320420&version=12337844 + All JIRAs completed for this release are tagged with 'FixVersion = 0.15.0'. You can view them here: https://issues.apache.org/jira/secure/ReleaseNote + .jspa?projectId=12320420&version=12337844 The artifacts have been signed with Key : YOUR_KEY_ID Please vote accordingly: - [ ] +1, accept RC as the official 0.14.0 release - [ ] -1, do not accept RC as the official 0.14.0 release because... + [ ] +1, accept RC as the official 0.15.0 release + [ ] -1, do not accept RC as the official 0.15.0 release because... ``` 17. After the vote has been accepted, update `RELEASE.md`. 18. Create a release tag @@ -106,26 +107,26 @@ Close the staged repository on Apache Nexus. https://dist.apache.org/repos/dist/release/predictionio/. This is the area for staging actual releases. 21. Create a subdirectory at the SVN staging area. The area should have a `KEYS` file. - * `mkdir 0.14.0` + * `mkdir 0.15.0` * Copy the binary distribution from the dev/ tree to the release/ tree * Copy the official release to the release/ tree 22. If you have updated the `KEYS` file, also copy that to the staging area. 23. Remove old releases from the ASF distribution mirrors. (https://www.apache.org/dev/mirrors.html#location) - * `svn delete 0.13.0` -24. `svn commit -m "Apache PredictionIO 0.14.0"` + * `svn delete 0.14.0` +24. `svn commit -m "Apache PredictionIO 0.15.0"` 25. Document breaking changes in https://predictionio.apache.org/resources/upgrade/. 26. Mark the version as released on JIRA. (https://issues.apache.org/jira/projects/PIO?selectedItem=com.atlassian.jira.jira-projects-plugin%3Arelease-page&status=no-filter) 27. Send out an email to the following mailing lists: announce, user, dev. ``` - Subject: [ANNOUNCE] Apache PredictionIO 0.14.0 Release + Subject: [ANNOUNCE] Apache PredictionIO 0.15.0 Release - The Apache PredictionIO team would like to announce the release of Apache PredictionIO 0.14.0. + The Apache PredictionIO team would like to announce the release of Apache PredictionIO 0.15.0. Release notes are here: - https://github.com/apache/predictionio/blob/release/0.14.0/RELEASE.md + https://github.com/apache/predictionio/blob/v0.15.0/RELEASE.md Apache PredictionIO is an open source Machine Learning Server built on top of state-of-the-art open source stack, that enables developers to manage and deploy production-ready predictive services for various kinds of machine learning tasks. @@ -133,9 +134,9 @@ for staging actual releases. https://predictionio.apache.org/ The release artifacts can be downloaded here: - https://www.apache.org/dyn/closer.lua/predictionio/0.14.0/apache-predictionio-0.14.0-bin.tar.gz + https://www.apache.org/dyn/closer.lua/predictionio/0.15.0/apache-predictionio-0.15.0-bin.tar.gz - All JIRAs completed for this release are tagged with 'FixVersion = 0.13.0'; the JIRA release notes can be found here: + All JIRAs completed for this release are tagged with 'FixVersion = 0.15.0'; the JIRA release notes can be found here: https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12320420&version=12337844 Thanks! diff --git a/build.sbt b/build.sbt index bbf6c689d3..f5515dbbf1 100644 --- a/build.sbt +++ b/build.sbt @@ -16,9 +16,28 @@ */ import PIOBuild._ +lazy val scalaSparkDepsVersion = Map( + "2.11" -> Map( + "2.0" -> Map( + "akka" -> "2.5.16", + "hadoop" -> "2.7.7", + "json4s" -> "3.2.11"), + "2.1" -> Map( + "akka" -> "2.5.17", + "hadoop" -> "2.7.7", + "json4s" -> "3.2.11"), + "2.2" -> Map( + "akka" -> "2.5.17", + "hadoop" -> "2.7.7", + "json4s" -> "3.2.11"), + "2.3" -> Map( + "akka" -> "2.5.17", + "hadoop" -> "2.7.7", + "json4s" -> "3.2.11"))) + name := "apache-predictionio-parent" -version in ThisBuild := "0.14.0" +version in ThisBuild := "0.15.0-SNAPSHOT" organization in ThisBuild := "org.apache.predictionio" @@ -26,7 +45,7 @@ scalaVersion in ThisBuild := sys.props.getOrElse("scala.version", "2.11.12") scalaBinaryVersion in ThisBuild := binaryVersion(scalaVersion.value) -crossScalaVersions in ThisBuild := Seq("2.11.12") +crossScalaVersions in ThisBuild := Seq(scalaVersion.value) scalacOptions in ThisBuild ++= Seq("-deprecation", "-unchecked", "-feature") @@ -45,9 +64,7 @@ hadoopVersion in ThisBuild := sys.props.getOrElse("hadoop.version", "2.7.7") akkaVersion in ThisBuild := sys.props.getOrElse("akka.version", "2.5.17") -lazy val es = sys.props.getOrElse("elasticsearch.version", "5.6.9") - -elasticsearchVersion in ThisBuild := es +elasticsearchVersion in ThisBuild := sys.props.getOrElse("elasticsearch.version", "6.8.1") hbaseVersion in ThisBuild := sys.props.getOrElse("hbase.version", "1.2.6") @@ -72,10 +89,6 @@ val commonTestSettings = Seq( "org.postgresql" % "postgresql" % "9.4-1204-jdbc41" % "test", "org.scalikejdbc" %% "scalikejdbc" % "3.1.0" % "test")) -val dataElasticsearch1 = (project in file("storage/elasticsearch1")). - settings(commonSettings: _*). - enablePlugins(GenJavadocPlugin) - val dataElasticsearch = (project in file("storage/elasticsearch")). settings(commonSettings: _*) @@ -145,19 +158,17 @@ val tools = (project in file("tools")). enablePlugins(GenJavadocPlugin). enablePlugins(SbtTwirl) -val dataEs = if (majorVersion(es) == 1) dataElasticsearch1 else dataElasticsearch - -val storageSubprojects = Seq( - dataEs, +val storageProjectReference = Seq( + dataElasticsearch, dataHbase, dataHdfs, dataJdbc, dataLocalfs, - dataS3) + dataS3) map Project.projectToRef val storage = (project in file("storage")) .settings(skip in publish := true) - .aggregate(storageSubprojects map Project.projectToRef: _*) + .aggregate(storageProjectReference: _*) .disablePlugins(sbtassembly.AssemblyPlugin) val assembly = (project in file("assembly")). @@ -167,8 +178,8 @@ val root = (project in file(".")). settings(commonSettings: _*). enablePlugins(ScalaUnidocPlugin). settings( - unidocProjectFilter in (ScalaUnidoc, unidoc) := inAnyProject -- inProjects(dataElasticsearch, dataElasticsearch1), - unidocProjectFilter in (JavaUnidoc, unidoc) := inAnyProject -- inProjects(dataElasticsearch, dataElasticsearch1), + unidocProjectFilter in (ScalaUnidoc, unidoc) := inAnyProject -- inProjects(storageProjectReference: _*), + unidocProjectFilter in (JavaUnidoc, unidoc) := inAnyProject -- inProjects(storageProjectReference: _*), scalacOptions in (ScalaUnidoc, unidoc) ++= Seq( "-groups", "-skip-packages", @@ -181,11 +192,6 @@ val root = (project in file(".")). "org.apache.predictionio.controller.java", "org.apache.predictionio.data.api", "org.apache.predictionio.data.storage.*", - "org.apache.predictionio.data.storage.hdfs", - "org.apache.predictionio.data.storage.jdbc", - "org.apache.predictionio.data.storage.localfs", - "org.apache.predictionio.data.storage.s3", - "org.apache.predictionio.data.storage.hbase", "org.apache.predictionio.data.view", "org.apache.predictionio.data.webhooks", "org.apache.predictionio.tools", diff --git a/conf/pio-env.sh.template b/conf/pio-env.sh.template index 3cd2415f9b..bb18b9872f 100644 --- a/conf/pio-env.sh.template +++ b/conf/pio-env.sh.template @@ -24,7 +24,6 @@ # you need to change these to fit your site. # SPARK_HOME: Apache Spark is a hard dependency and must be configured. -# SPARK_HOME=$PIO_HOME/vendors/spark-2.0.2-bin-hadoop2.7 SPARK_HOME=$PIO_HOME/vendors/spark-2.1.1-bin-hadoop2.6 POSTGRES_JDBC_DRIVER=$PIO_HOME/lib/postgresql-42.0.0.jar @@ -40,7 +39,7 @@ MYSQL_JDBC_DRIVER=$PIO_HOME/lib/mysql-connector-java-5.1.41.jar # HBASE_CONF_DIR: You must configure this if you intend to run PredictionIO # with HBase on a remote cluster. -# HBASE_CONF_DIR=$PIO_HOME/vendors/hbase-1.0.0/conf +# HBASE_CONF_DIR=$PIO_HOME/vendors/hbase-1.2.6/conf # Filesystem paths where PredictionIO uses as block storage. PIO_FS_BASEDIR=$HOME/.pio_store @@ -89,16 +88,10 @@ PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio # PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=localhost # PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200 # PIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES=http -# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$PIO_HOME/vendors/elasticsearch-5.6.9 +# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$PIO_HOME/vendors/elasticsearch-6.8.1 # Optional basic HTTP auth # PIO_STORAGE_SOURCES_ELASTICSEARCH_USERNAME=my-name # PIO_STORAGE_SOURCES_ELASTICSEARCH_PASSWORD=my-secret -# Elasticsearch 1.x Example -# PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch -# PIO_STORAGE_SOURCES_ELASTICSEARCH_CLUSTERNAME= -# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=localhost -# PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9300 -# PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$PIO_HOME/vendors/elasticsearch-1.7.6 # Local File System Example # PIO_STORAGE_SOURCES_LOCALFS_TYPE=localfs @@ -106,7 +99,7 @@ PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio # HBase Example # PIO_STORAGE_SOURCES_HBASE_TYPE=hbase -# PIO_STORAGE_SOURCES_HBASE_HOME=$PIO_HOME/vendors/hbase-1.0.0 +# PIO_STORAGE_SOURCES_HBASE_HOME=$PIO_HOME/vendors/hbase-1.2.6 # AWS S3 Example # PIO_STORAGE_SOURCES_S3_TYPE=s3 diff --git a/conf/pio-vendors.sh b/conf/pio-vendors.sh index d68ff7e516..a29faab008 100644 --- a/conf/pio-vendors.sh +++ b/conf/pio-vendors.sh @@ -32,22 +32,15 @@ if [ -z "$PIO_HADOOP_VERSION" ]; then fi if [ -z "$PIO_ELASTICSEARCH_VERSION" ]; then - PIO_ELASTICSEARCH_VERSION="5.6.9" + PIO_ELASTICSEARCH_VERSION="6.8.1" fi if [ -z "$PIO_HBASE_VERSION" ]; then PIO_HBASE_VERSION="1.2.6" fi -ES_MAJOR=`echo $PIO_ELASTICSEARCH_VERSION | awk -F. '{print $1}'` - -if [ "$ES_MAJOR" = "1" ]; then - export ES_IMAGE="elasticsearch" - export ES_TAG="1" -else - export ES_IMAGE="docker.elastic.co/elasticsearch/elasticsearch" - export ES_TAG="$PIO_ELASTICSEARCH_VERSION" -fi +export ES_IMAGE="docker.elastic.co/elasticsearch/elasticsearch" +export ES_TAG="$PIO_ELASTICSEARCH_VERSION" HBASE_MAJOR=`echo $PIO_HBASE_VERSION | awk -F. '{print $1 "." $2}'` export HBASE_TAG="$HBASE_MAJOR" @@ -60,6 +53,3 @@ SPARK_DIR=spark-${PIO_SPARK_VERSION}-bin-hadoop${HADOOP_MAJOR} SPARK_ARCHIVE=${SPARK_DIR}.tgz SPARK_DOWNLOAD_MIRROR=https://www.apache.org/dyn/closer.lua\?action=download\&filename=spark/spark-${PIO_SPARK_VERSION}/${SPARK_ARCHIVE} SPARK_DOWNLOAD_ARCHIVE=https://archive.apache.org/dist/spark/spark-${PIO_SPARK_VERSION}/${SPARK_ARCHIVE} -# ELASTICSEARCH_DOWNLOAD -# 5.x https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-${PIO_ELASTICSEARCH_VERSION}.tar.gz -# 1.x https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${PIO_ELASTICSEARCH_VERSION}.tar.gz diff --git a/core/src/main/scala/org/apache/predictionio/controller/IdentityPreparator.scala b/core/src/main/scala/org/apache/predictionio/controller/IdentityPreparator.scala index 82561424c4..b7a7b8f568 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/IdentityPreparator.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/IdentityPreparator.scala @@ -30,7 +30,7 @@ import org.apache.spark.SparkContext * @group Preparator */ class IdentityPreparator[TD] extends BasePreparator[TD, TD] { - def prepareBase(sc: SparkContext, td: TD): TD = td + override def prepareBase(sc: SparkContext, td: TD): TD = td } /** Companion object of [[IdentityPreparator]] that conveniently returns an diff --git a/core/src/main/scala/org/apache/predictionio/controller/LAlgorithm.scala b/core/src/main/scala/org/apache/predictionio/controller/LAlgorithm.scala index 27d1d14e96..7a7637cfcc 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/LAlgorithm.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/LAlgorithm.scala @@ -45,7 +45,7 @@ import scala.reflect._ abstract class LAlgorithm[PD, M : ClassTag, Q, P] extends BaseAlgorithm[RDD[PD], RDD[M], Q, P] { - def trainBase(sc: SparkContext, pd: RDD[PD]): RDD[M] = pd.map(train) + override def trainBase(sc: SparkContext, pd: RDD[PD]): RDD[M] = pd.map(train) /** Implement this method to produce a model from prepared data. * @@ -54,7 +54,7 @@ abstract class LAlgorithm[PD, M : ClassTag, Q, P] */ def train(pd: PD): M - def batchPredictBase(sc: SparkContext, bm: Any, qs: RDD[(Long, Q)]) + override def batchPredictBase(sc: SparkContext, bm: Any, qs: RDD[(Long, Q)]) : RDD[(Long, P)] = { val mRDD = bm.asInstanceOf[RDD[M]] batchPredict(mRDD, qs) @@ -76,7 +76,7 @@ abstract class LAlgorithm[PD, M : ClassTag, Q, P] } } - def predictBase(localBaseModel: Any, q: Q): P = { + override def predictBase(localBaseModel: Any, q: Q): P = { predict(localBaseModel.asInstanceOf[M], q) } diff --git a/core/src/main/scala/org/apache/predictionio/controller/LAverageServing.scala b/core/src/main/scala/org/apache/predictionio/controller/LAverageServing.scala index b51b9d12ec..1427fe66cd 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/LAverageServing.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/LAverageServing.scala @@ -27,7 +27,7 @@ import org.apache.predictionio.core.BaseAlgorithm */ class LAverageServing[Q] extends LServing[Q, Double] { /** Returns the average of all algorithms' predictions. */ - def serve(query: Q, predictions: Seq[Double]): Double = { + override def serve(query: Q, predictions: Seq[Double]): Double = { predictions.sum / predictions.length } } diff --git a/core/src/main/scala/org/apache/predictionio/controller/LDataSource.scala b/core/src/main/scala/org/apache/predictionio/controller/LDataSource.scala index fc08e60d74..13b92b94bf 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/LDataSource.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/LDataSource.scala @@ -38,14 +38,14 @@ import scala.reflect._ abstract class LDataSource[TD: ClassTag, EI, Q, A] extends BaseDataSource[RDD[TD], EI, Q, A] { - def readTrainingBase(sc: SparkContext): RDD[TD] = { + override def readTrainingBase(sc: SparkContext): RDD[TD] = { sc.parallelize(Seq(None)).map(_ => readTraining()) } /** Implement this method to only return training data from a data source */ def readTraining(): TD - def readEvalBase(sc: SparkContext): Seq[(RDD[TD], EI, RDD[(Q, A)])] = { + override def readEvalBase(sc: SparkContext): Seq[(RDD[TD], EI, RDD[(Q, A)])] = { val localEvalData: Seq[(TD, EI, Seq[(Q, A)])] = readEval() localEvalData.map { case (td, ei, qaSeq) => { diff --git a/core/src/main/scala/org/apache/predictionio/controller/LFirstServing.scala b/core/src/main/scala/org/apache/predictionio/controller/LFirstServing.scala index 01e3840a8e..e192ea73d8 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/LFirstServing.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/LFirstServing.scala @@ -27,7 +27,7 @@ import org.apache.predictionio.core.BaseAlgorithm */ class LFirstServing[Q, P] extends LServing[Q, P] { /** Returns the first algorithm's prediction. */ - def serve(query: Q, predictions: Seq[P]): P = predictions.head + override def serve(query: Q, predictions: Seq[P]): P = predictions.head } /** A concrete implementation of [[LServing]] returning the first algorithm's diff --git a/core/src/main/scala/org/apache/predictionio/controller/LPreparator.scala b/core/src/main/scala/org/apache/predictionio/controller/LPreparator.scala index d29a5a3b71..12d8d2d4cf 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/LPreparator.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/LPreparator.scala @@ -36,7 +36,7 @@ import scala.reflect._ abstract class LPreparator[TD, PD : ClassTag] extends BasePreparator[RDD[TD], RDD[PD]] { - def prepareBase(sc: SparkContext, rddTd: RDD[TD]): RDD[PD] = { + override def prepareBase(sc: SparkContext, rddTd: RDD[TD]): RDD[PD] = { rddTd.map(prepare) } diff --git a/core/src/main/scala/org/apache/predictionio/controller/LServing.scala b/core/src/main/scala/org/apache/predictionio/controller/LServing.scala index 4b123fe247..9bd1c99d81 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/LServing.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/LServing.scala @@ -28,7 +28,7 @@ import org.apache.predictionio.core.BaseServing * @group Serving */ abstract class LServing[Q, P] extends BaseServing[Q, P] { - def supplementBase(q: Q): Q = supplement(q) + override def supplementBase(q: Q): Q = supplement(q) /** :: Experimental :: * Implement this method to supplement the query before sending it to @@ -40,7 +40,7 @@ abstract class LServing[Q, P] extends BaseServing[Q, P] { @Experimental def supplement(q: Q): Q = q - def serveBase(q: Q, ps: Seq[P]): P = { + override def serveBase(q: Q, ps: Seq[P]): P = { serve(q, ps) } diff --git a/core/src/main/scala/org/apache/predictionio/controller/LocalFileSystemPersistentModel.scala b/core/src/main/scala/org/apache/predictionio/controller/LocalFileSystemPersistentModel.scala index deb2db47bc..18980bdc43 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/LocalFileSystemPersistentModel.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/LocalFileSystemPersistentModel.scala @@ -41,7 +41,7 @@ import org.apache.spark.SparkContext * @group Algorithm */ trait LocalFileSystemPersistentModel[AP <: Params] extends PersistentModel[AP] { - def save(id: String, params: AP, sc: SparkContext): Boolean = { + override def save(id: String, params: AP, sc: SparkContext): Boolean = { Utils.save(id, this) true } @@ -59,7 +59,7 @@ trait LocalFileSystemPersistentModel[AP <: Params] extends PersistentModel[AP] { */ trait LocalFileSystemPersistentModelLoader[AP <: Params, M] extends PersistentModelLoader[AP, M] { - def apply(id: String, params: AP, sc: Option[SparkContext]): M = { + override def apply(id: String, params: AP, sc: Option[SparkContext]): M = { Utils.load(id).asInstanceOf[M] } } diff --git a/core/src/main/scala/org/apache/predictionio/controller/Metric.scala b/core/src/main/scala/org/apache/predictionio/controller/Metric.scala index bc29092d35..853135c17c 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/Metric.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/Metric.scala @@ -103,9 +103,9 @@ abstract class AverageMetric[EI, Q, P, A] /** Implement this method to return a score that will be used for averaging * across all QPA tuples. */ - def calculate(q: Q, p: P, a: A): Double + override def calculate(q: Q, p: P, a: A): Double - def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]) + override def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]) : Double = { calculateStats(sc, evalDataSet).mean } @@ -128,9 +128,9 @@ abstract class OptionAverageMetric[EI, Q, P, A] /** Implement this method to return a score that will be used for averaging * across all QPA tuples. */ - def calculate(q: Q, p: P, a: A): Option[Double] + override def calculate(q: Q, p: P, a: A): Option[Double] - def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]) + override def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]) : Double = { calculateStats(sc, evalDataSet).mean } @@ -156,9 +156,9 @@ abstract class StdevMetric[EI, Q, P, A] * the stdev * across all QPA tuples. */ - def calculate(q: Q, p: P, a: A): Double + override def calculate(q: Q, p: P, a: A): Double - def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]) + override def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]) : Double = { calculateStats(sc, evalDataSet).stdev } @@ -184,9 +184,9 @@ abstract class OptionStdevMetric[EI, Q, P, A] * the stdev * across all QPA tuples. */ - def calculate(q: Q, p: P, a: A): Option[Double] + override def calculate(q: Q, p: P, a: A): Option[Double] - def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]) + override def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]) : Double = { calculateStats(sc, evalDataSet).stdev } @@ -208,9 +208,9 @@ abstract class SumMetric[EI, Q, P, A, R: ClassTag](implicit num: Numeric[R]) /** Implement this method to return a score that will be used for summing * across all QPA tuples. */ - def calculate(q: Q, p: P, a: A): R + override def calculate(q: Q, p: P, a: A): R - def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]) + override def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]) : R = { val union: RDD[R] = sc.union( evalDataSet.map { case (_, qpaRDD) => @@ -232,7 +232,7 @@ abstract class SumMetric[EI, Q, P, A, R: ClassTag](implicit num: Numeric[R]) * @group Evaluation */ class ZeroMetric[EI, Q, P, A] extends Metric[EI, Q, P, A, Double]() { - def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]): Double = 0.0 + override def calculate(sc: SparkContext, evalDataSet: Seq[(EI, RDD[(Q, P, A)])]): Double = 0.0 } /** Companion object of [[ZeroMetric]] diff --git a/core/src/main/scala/org/apache/predictionio/controller/MetricEvaluator.scala b/core/src/main/scala/org/apache/predictionio/controller/MetricEvaluator.scala index fc5ec15310..17f09091d9 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/MetricEvaluator.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/MetricEvaluator.scala @@ -215,7 +215,7 @@ class MetricEvaluator[EI, Q, P, A, R] ( writer.close() } - def evaluateBase( + override def evaluateBase( sc: SparkContext, evaluation: Evaluation, engineEvalDataSet: Seq[(EngineParams, Seq[(EI, RDD[(Q, P, A)])])], diff --git a/core/src/main/scala/org/apache/predictionio/controller/P2LAlgorithm.scala b/core/src/main/scala/org/apache/predictionio/controller/P2LAlgorithm.scala index c617d2c50a..1f59ecbff8 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/P2LAlgorithm.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/P2LAlgorithm.scala @@ -46,7 +46,7 @@ import scala.reflect._ abstract class P2LAlgorithm[PD, M: ClassTag, Q: ClassTag, P] extends BaseAlgorithm[PD, M, Q, P] { - def trainBase(sc: SparkContext, pd: PD): M = train(sc, pd) + override def trainBase(sc: SparkContext, pd: PD): M = train(sc, pd) /** Implement this method to produce a model from prepared data. * @@ -55,7 +55,7 @@ abstract class P2LAlgorithm[PD, M: ClassTag, Q: ClassTag, P] */ def train(sc: SparkContext, pd: PD): M - def batchPredictBase(sc: SparkContext, bm: Any, qs: RDD[(Long, Q)]) + override def batchPredictBase(sc: SparkContext, bm: Any, qs: RDD[(Long, Q)]) : RDD[(Long, P)] = batchPredict(bm.asInstanceOf[M], qs) /** This is a default implementation to perform batch prediction. Override @@ -70,7 +70,7 @@ abstract class P2LAlgorithm[PD, M: ClassTag, Q: ClassTag, P] qs.mapValues { q => predict(m, q) } } - def predictBase(bm: Any, q: Q): P = predict(bm.asInstanceOf[M], q) + override def predictBase(bm: Any, q: Q): P = predict(bm.asInstanceOf[M], q) /** Implement this method to produce a prediction from a query and trained * model. diff --git a/core/src/main/scala/org/apache/predictionio/controller/PAlgorithm.scala b/core/src/main/scala/org/apache/predictionio/controller/PAlgorithm.scala index 55f8363fdb..76949564c4 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/PAlgorithm.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/PAlgorithm.scala @@ -47,7 +47,7 @@ import org.apache.spark.rdd.RDD abstract class PAlgorithm[PD, M, Q, P] extends BaseAlgorithm[PD, M, Q, P] { - def trainBase(sc: SparkContext, pd: PD): M = train(sc, pd) + override def trainBase(sc: SparkContext, pd: PD): M = train(sc, pd) /** Implement this method to produce a model from prepared data. * @@ -56,7 +56,7 @@ abstract class PAlgorithm[PD, M, Q, P] */ def train(sc: SparkContext, pd: PD): M - def batchPredictBase(sc: SparkContext, bm: Any, qs: RDD[(Long, Q)]) + override def batchPredictBase(sc: SparkContext, bm: Any, qs: RDD[(Long, Q)]) : RDD[(Long, P)] = batchPredict(bm.asInstanceOf[M], qs) /** To provide evaluation feature, one must override and implement this method @@ -72,7 +72,7 @@ abstract class PAlgorithm[PD, M, Q, P] def batchPredict(m: M, qs: RDD[(Long, Q)]): RDD[(Long, P)] = throw new NotImplementedError("batchPredict not implemented") - def predictBase(baseModel: Any, query: Q): P = { + override def predictBase(baseModel: Any, query: Q): P = { predict(baseModel.asInstanceOf[M], query) } diff --git a/core/src/main/scala/org/apache/predictionio/controller/PDataSource.scala b/core/src/main/scala/org/apache/predictionio/controller/PDataSource.scala index cd9b853996..184fa85342 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/PDataSource.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/PDataSource.scala @@ -37,12 +37,12 @@ import org.apache.spark.rdd.RDD abstract class PDataSource[TD, EI, Q, A] extends BaseDataSource[TD, EI, Q, A] { - def readTrainingBase(sc: SparkContext): TD = readTraining(sc) + override def readTrainingBase(sc: SparkContext): TD = readTraining(sc) /** Implement this method to only return training data from a data source */ def readTraining(sc: SparkContext): TD - def readEvalBase(sc: SparkContext): Seq[(TD, EI, RDD[(Q, A)])] = readEval(sc) + override def readEvalBase(sc: SparkContext): Seq[(TD, EI, RDD[(Q, A)])] = readEval(sc) /** To provide evaluation feature for your engine, your must override this * method to return data for evaluation from a data source. Returned data can diff --git a/core/src/main/scala/org/apache/predictionio/controller/PPreparator.scala b/core/src/main/scala/org/apache/predictionio/controller/PPreparator.scala index ce445b8b14..cec959187c 100644 --- a/core/src/main/scala/org/apache/predictionio/controller/PPreparator.scala +++ b/core/src/main/scala/org/apache/predictionio/controller/PPreparator.scala @@ -33,7 +33,7 @@ import org.apache.spark.SparkContext abstract class PPreparator[TD, PD] extends BasePreparator[TD, PD] { - def prepareBase(sc: SparkContext, td: TD): PD = { + override def prepareBase(sc: SparkContext, td: TD): PD = { prepare(sc, td) } diff --git a/core/src/main/scala/org/apache/predictionio/workflow/CleanupFunctions.scala b/core/src/main/scala/org/apache/predictionio/workflow/CleanupFunctions.scala index bdd8b01d48..7312555db9 100644 --- a/core/src/main/scala/org/apache/predictionio/workflow/CleanupFunctions.scala +++ b/core/src/main/scala/org/apache/predictionio/workflow/CleanupFunctions.scala @@ -37,7 +37,7 @@ object CleanupFunctions { * CleanupFunctions.add { MyStorageClass.close } * }}} * - * @param anonymous function containing cleanup code. + * @param f function containing cleanup code. */ def add(f: () => Unit): Seq[() => Unit] = { functions = functions :+ f @@ -56,8 +56,6 @@ object CleanupFunctions { * CleanupFunctions.run() * } * }}} - * - * @param anonymous function containing cleanup code. */ def run(): Unit = { functions.foreach { f => f() } diff --git a/core/src/main/scala/org/apache/predictionio/workflow/EngineServerPluginContext.scala b/core/src/main/scala/org/apache/predictionio/workflow/EngineServerPluginContext.scala index 011cd95c98..1d2eaaa7d9 100644 --- a/core/src/main/scala/org/apache/predictionio/workflow/EngineServerPluginContext.scala +++ b/core/src/main/scala/org/apache/predictionio/workflow/EngineServerPluginContext.scala @@ -80,8 +80,9 @@ object EngineServerPluginContext extends Logging { private def stringFromFile(filePath: String): Option[String] = { try { - val fs = FileSystem.get(new Configuration()) - val path = new Path(new URI(filePath)) + val uri = new URI(filePath) + val fs = FileSystem.get(uri, new Configuration()) + val path = new Path(uri) if (fs.exists(path)) { Some(new String(ByteStreams.toByteArray(fs.open(path)).map(_.toChar))) } else { diff --git a/core/src/main/scala/org/apache/predictionio/workflow/EngineServerPluginsActor.scala b/core/src/main/scala/org/apache/predictionio/workflow/EngineServerPluginsActor.scala index 0f052ab1f7..b31377362a 100644 --- a/core/src/main/scala/org/apache/predictionio/workflow/EngineServerPluginsActor.scala +++ b/core/src/main/scala/org/apache/predictionio/workflow/EngineServerPluginsActor.scala @@ -29,7 +29,7 @@ class PluginsActor(engineVariant: String) extends Actor { val pluginContext = EngineServerPluginContext(log, engineVariant) - def receive: PartialFunction[Any, Unit] = { + override def receive: PartialFunction[Any, Unit] = { case (ei: EngineInstance, q: JValue, p: JValue) => pluginContext.outputSniffers.values.foreach(_.process(ei, q, p, pluginContext)) case h: PluginsActor.HandleREST => diff --git a/core/src/main/scala/org/apache/predictionio/workflow/FakeWorkflow.scala b/core/src/main/scala/org/apache/predictionio/workflow/FakeWorkflow.scala index 8e4db512b3..f9e9a1a117 100644 --- a/core/src/main/scala/org/apache/predictionio/workflow/FakeWorkflow.scala +++ b/core/src/main/scala/org/apache/predictionio/workflow/FakeWorkflow.scala @@ -34,7 +34,7 @@ private[predictionio] class FakeEngine extends BaseEngine[EmptyParams, EmptyParams, EmptyParams, EmptyParams] { @transient lazy val logger = Logger[this.type] - def train( + override def train( sc: SparkContext, engineParams: EngineParams, engineInstanceId: String, @@ -42,7 +42,7 @@ extends BaseEngine[EmptyParams, EmptyParams, EmptyParams, EmptyParams] { throw new StopAfterReadInterruption() } - def eval( + override def eval( sc: SparkContext, engineParams: EngineParams, params: WorkflowParams) @@ -56,7 +56,7 @@ private[predictionio] class FakeRunner(f: (SparkContext => Unit)) extends BaseEvaluator[EmptyParams, EmptyParams, EmptyParams, EmptyParams, FakeEvalResult] { @transient private lazy val logger = Logger[this.type] - def evaluateBase( + override def evaluateBase( sc: SparkContext, evaluation: Evaluation, engineEvalDataSet: diff --git a/data/src/main/scala/org/apache/predictionio/data/store/python/PPythonEventStore.scala b/data/src/main/scala/org/apache/predictionio/data/store/python/PPythonEventStore.scala index 1d03634e12..be269a97db 100644 --- a/data/src/main/scala/org/apache/predictionio/data/store/python/PPythonEventStore.scala +++ b/data/src/main/scala/org/apache/predictionio/data/store/python/PPythonEventStore.scala @@ -82,8 +82,17 @@ object PPythonEventStore { Option(entityType), Option(entityId), Option(eventNames), - Option(Option(targetEntityType)), - Option(Option(targetEntityId)))(spark.sparkContext).map { e => + targetEntityType match { + case null => None + case "" => Option(None) + case _ => Option(Option(targetEntityType)) + }, + targetEntityId match { + case null => None + case "" => Option(None) + case _ => Option(Option(targetEntityId)) + } + )(spark.sparkContext).map { e => ( e.eventId, e.event, diff --git a/docker/charts/README.md b/docker/charts/README.md new file mode 100644 index 0000000000..2590d7a2c9 --- /dev/null +++ b/docker/charts/README.md @@ -0,0 +1,59 @@ + + +Helm Charts for Apache PredictionIO +============================ + +## Overview + +Helm Charts are packages of pre-configured Kubernetes resources. +Using charts, you can install and manage PredictionIO in the Kubernetes. + +## Usage + +### Install PredictionIO with PostgreSQL + +To install PostgreSQL and PredictionIO, run `helm install` command: + +``` +helm install --name my-postgresql stable/postgresql -f postgresql.yaml +helm install --name my-pio ./predictionio -f predictionio_postgresql.yaml +``` + +`postgresql.yaml` and `predictionio_postgresql.yaml` are configuration files for charts. +To access Jupyter for PredictionIO, run `kubectl port-forward` and then open `http://localhost:8888/`. + +``` +export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=predictionio,app.kubernetes.io/instance=my-pio" -o jsonpath="{.items[0].metadata.name}") +kubectl port-forward $POD_NAME 8888:8888 +``` + + +### Install Spark Cluster + +To install Spark cluster, run the following command: + +``` +helm install --name my-spark ./spark +``` + +To train a model, run `pio train` as below: + +``` +pio train -- --master spark://my-spark-master:7077 +``` + diff --git a/docker/charts/postgresql.yaml b/docker/charts/postgresql.yaml new file mode 100644 index 0000000000..6d4e38389b --- /dev/null +++ b/docker/charts/postgresql.yaml @@ -0,0 +1,23 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +postgresqlUsername: pio +postgresqlPassword: pio +postgresqlDatabase: pio + +# for testing +persistence: + enabled: false diff --git a/docker/charts/predictionio/.helmignore b/docker/charts/predictionio/.helmignore new file mode 100644 index 0000000000..4a332ba309 --- /dev/null +++ b/docker/charts/predictionio/.helmignore @@ -0,0 +1,38 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/docker/charts/predictionio/Chart.yaml b/docker/charts/predictionio/Chart.yaml new file mode 100644 index 0000000000..7f97bc8a67 --- /dev/null +++ b/docker/charts/predictionio/Chart.yaml @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +name: predictionio +version: 0.1.0 +appVersion: 0.13.0 +description: Machine learning server +home: http://predictionio.apache.org +icon: http://predictionio.apache.org/images/logos/logo-ee2b9bb3.png +sources: + - https://github.com/apache/predictionio +maintainers: + - name: Shinsuke Sugaya + email: shinsuke@apache.org diff --git a/docker/charts/predictionio/templates/NOTES.txt b/docker/charts/predictionio/templates/NOTES.txt new file mode 100644 index 0000000000..9a2e414320 --- /dev/null +++ b/docker/charts/predictionio/templates/NOTES.txt @@ -0,0 +1,31 @@ +{{/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */}} +1. Get the application URL by running these commands: +{{- if contains "NodePort" .Values.pio.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "predictionio.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.pio.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc -w {{ include "predictionio.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "predictionio.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:{{ .Values.pio.service.port }} +{{- else if contains "ClusterIP" .Values.pio.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "predictionio.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:8888 to use your application" + kubectl port-forward $POD_NAME 8888:8888 +{{- end }} diff --git a/docker/charts/predictionio/templates/_helpers.tpl b/docker/charts/predictionio/templates/_helpers.tpl new file mode 100644 index 0000000000..57f345dad1 --- /dev/null +++ b/docker/charts/predictionio/templates/_helpers.tpl @@ -0,0 +1,36 @@ +{{/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */}} +{{- define "predictionio.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "predictionio.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{- define "predictionio.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} diff --git a/docker/charts/predictionio/templates/pio-deployment.yaml b/docker/charts/predictionio/templates/pio-deployment.yaml new file mode 100644 index 0000000000..94ca1b6f25 --- /dev/null +++ b/docker/charts/predictionio/templates/pio-deployment.yaml @@ -0,0 +1,75 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + name: {{ include "predictionio.fullname" . }} + labels: + app.kubernetes.io/name: {{ include "predictionio.name" . }} + helm.sh/chart: {{ include "predictionio.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +spec: + replicas: {{ .Values.pio.replicas }} + selector: + matchLabels: + app.kubernetes.io/name: {{ include "predictionio.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + template: + metadata: + labels: + app.kubernetes.io/name: {{ include "predictionio.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + spec: + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.pio.image.repository }}:{{ .Values.pio.image.tag }}" + imagePullPolicy: {{ .Values.pio.image.pullPolicy }} + env: +{{ toYaml .Values.pio.env | indent 12 }} + ports: + - name: event + containerPort: 7070 + protocol: TCP + - name: predict + containerPort: 8000 + protocol: TCP + - name: jupyter + containerPort: 8888 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: 7070 + readinessProbe: + httpGet: + path: / + port: 7070 + resources: +{{ toYaml .Values.pio.resources | indent 12 }} + {{- with .Values.pio.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.pio.affinity }} + affinity: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.pio.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} + {{- end }} diff --git a/docker/charts/predictionio/templates/pio-service.yaml b/docker/charts/predictionio/templates/pio-service.yaml new file mode 100644 index 0000000000..d4f813051b --- /dev/null +++ b/docker/charts/predictionio/templates/pio-service.yaml @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: v1 +kind: Service +metadata: + name: {{ include "predictionio.fullname" . }} + labels: + app.kubernetes.io/name: {{ include "predictionio.name" . }} + helm.sh/chart: {{ include "predictionio.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +spec: + type: {{ .Values.pio.service.type }} + ports: + - port: {{ .Values.pio.service.port }} + targetPort: 8888 + protocol: TCP + name: jupyter + selector: + app.kubernetes.io/name: {{ include "predictionio.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} diff --git a/docker/charts/predictionio/values.yaml b/docker/charts/predictionio/values.yaml new file mode 100644 index 0000000000..db1d2d9c3c --- /dev/null +++ b/docker/charts/predictionio/values.yaml @@ -0,0 +1,53 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +pio: + replicas: 1 + image: + repository: predictionio/pio-jupyter + tag: latest + pullPolicy: IfNotPresent + service: + type: ClusterIP + port: 8888 + env: + - name: PIO_STORAGE_SOURCES_PGSQL_TYPE + value: jdbc + - name: PIO_STORAGE_SOURCES_PGSQL_URL + value: "jdbc:postgresql://postgresql/pio" + - name: PIO_STORAGE_SOURCES_PGSQL_USERNAME + value: pio + - name: PIO_STORAGE_SOURCES_PGSQL_PASSWORD + value: pio + - name: PIO_STORAGE_REPOSITORIES_MODELDATA_NAME + value: pio_model + - name: PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE + value: PGSQL + - name: PIO_STORAGE_REPOSITORIES_METADATA_NAME + value: pio_meta + - name: PIO_STORAGE_REPOSITORIES_METADATA_SOURCE + value: PGSQL + - name: PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME + value: pio_event + - name: PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE + value: PGSQL + - name: PYSPARK_DRIVER_PYTHON_OPTS + value: "notebook --NotebookApp.token=''" + resources: {} + nodeSelector: {} + tolerations: [] + affinity: {} + diff --git a/docker/charts/predictionio_postgresql.yaml b/docker/charts/predictionio_postgresql.yaml new file mode 100644 index 0000000000..8214d5b720 --- /dev/null +++ b/docker/charts/predictionio_postgresql.yaml @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +pio: + env: + - name: PIO_STORAGE_SOURCES_PGSQL_TYPE + value: jdbc + - name: PIO_STORAGE_SOURCES_PGSQL_URL + value: "jdbc:postgresql://my-postgresql-postgresql:5432/pio" + - name: PIO_STORAGE_SOURCES_PGSQL_USERNAME + value: pio + - name: PIO_STORAGE_SOURCES_PGSQL_PASSWORD + value: pio + - name: PIO_STORAGE_REPOSITORIES_MODELDATA_NAME + value: pio_model + - name: PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE + value: PGSQL + - name: PIO_STORAGE_REPOSITORIES_METADATA_NAME + value: pio_meta + - name: PIO_STORAGE_REPOSITORIES_METADATA_SOURCE + value: PGSQL + - name: PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME + value: pio_event + - name: PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE + value: PGSQL + - name: PYSPARK_DRIVER_PYTHON_OPTS + value: "notebook --NotebookApp.token=''" + diff --git a/docker/charts/spark/.helmignore b/docker/charts/spark/.helmignore new file mode 100644 index 0000000000..4a332ba309 --- /dev/null +++ b/docker/charts/spark/.helmignore @@ -0,0 +1,38 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/docker/charts/spark/Chart.yaml b/docker/charts/spark/Chart.yaml new file mode 100755 index 0000000000..aee7901d58 --- /dev/null +++ b/docker/charts/spark/Chart.yaml @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +name: spark +version: 0.3.0 +appVersion: 2.3.2 +description: Fast and general-purpose cluster computing system. +home: http://spark.apache.org +icon: http://spark.apache.org/images/spark-logo-trademark.png +sources: + - https://github.com/kubernetes/kubernetes/tree/master/examples/spark + - https://github.com/apache/spark +maintainers: + - name: lachie83 + email: lachlan.evenson@gmail.com + - name: Shinsuke Sugaya + email: shinsuke@apache.org diff --git a/docker/charts/spark/README.md b/docker/charts/spark/README.md new file mode 100644 index 0000000000..4914f139a5 --- /dev/null +++ b/docker/charts/spark/README.md @@ -0,0 +1,99 @@ + +# Apache Spark Helm Chart + +Apache Spark is a fast and general-purpose cluster computing system. + +* http://spark.apache.org/ + +This chart is based on stable/spark in [Helm Charts](https://github.com/helm/charts). + +## Chart Details +This chart will do the following: + +* 1 x Spark Master with port 8080 exposed on an external LoadBalancer +* 3 x Spark Workers with HorizontalPodAutoscaler to scale to max 10 pods when CPU hits 50% of 100m +* All using Kubernetes Deployments + +## Prerequisites + +* Assumes that serviceAccount tokens are available under hostname metadata. (Works on GKE by default) URL -- http://metadata/computeMetadata/v1/instance/service-accounts/default/token + +## Installing the Chart + +To install the chart with the release name `my-release`: + +```bash +$ helm install --name my-release stable/spark +``` + +## Configuration + +The following table lists the configurable parameters of the Spark chart and their default values. + +### Spark Master + +| Parameter | Description | Default | +| ----------------------- | ---------------------------------- | ---------------------------------------------------------- | +| `Master.Name` | Spark master name | `spark-master` | +| `Master.Image` | Container image name | `bde2020/spark-master` | +| `Master.ImageTag` | Container image tag | `2.2.2-hadoop2.7` | +| `Master.Replicas` | k8s deployment replicas | `1` | +| `Master.Component` | k8s selector key | `spark-master` | +| `Master.Cpu` | container requested cpu | `100m` | +| `Master.Memory` | container requested memory | `512Mi` | +| `Master.ServicePort` | k8s service port | `7077` | +| `Master.ContainerPort` | Container listening port | `7077` | +| `Master.DaemonMemory` | Master JVM Xms and Xmx option | `1g` | +| `Master.ServiceType ` | Kubernetes Service type | `LoadBalancer` | + +### Spark WebUi + +| Parameter | Description | Default | +|-----------------------|----------------------------------|----------------------------------------------------------| +| `WebUi.Name` | Spark webui name | `spark-webui` | +| `WebUi.ServicePort` | k8s service port | `8080` | +| `WebUi.ContainerPort` | Container listening port | `8080` | + +### Spark Worker + +| Parameter | Description | Default | +| ----------------------- | ------------------------------------ | ---------------------------------------------------------- | +| `Worker.Name` | Spark worker name | `spark-worker` | +| `Worker.Image` | Container image name | `bde2020/spark-worker` | +| `Worker.ImageTag` | Container image tag | `2.2.2-hadoop2.7` | +| `Worker.Replicas` | k8s hpa and deployment replicas | `3` | +| `Worker.ReplicasMax` | k8s hpa max replicas | `10` | +| `Worker.Component` | k8s selector key | `spark-worker` | +| `Worker.Cpu` | container requested cpu | `100m` | +| `Worker.Memory` | container requested memory | `512Mi` | +| `Worker.ContainerPort` | Container listening port | `7077` | +| `Worker.CpuTargetPercentage` | k8s hpa cpu targetPercentage | `50` | +| `Worker.DaemonMemory` | Worker JVM Xms and Xmx setting | `1g` | +| `Worker.ExecutorMemory` | Worker memory available for executor | `1g` | +| `Worker.Autoscaling` | Enable horizontal pod autoscaling | `false` | + + +Specify each parameter using the `--set key=value[,key=value]` argument to `helm install`. + +Alternatively, a YAML file that specifies the values for the parameters can be provided while installing the chart. For example, + +```bash +$ helm install --name my-release -f values.yaml stable/spark +``` + +> **Tip**: You can use the default [values.yaml](values.yaml) diff --git a/docker/charts/spark/templates/NOTES.txt b/docker/charts/spark/templates/NOTES.txt new file mode 100644 index 0000000000..3611bf57f8 --- /dev/null +++ b/docker/charts/spark/templates/NOTES.txt @@ -0,0 +1,24 @@ +{{/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */}} +1. Get the Spark URL to visit by running these commands in the same shell: + + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ template "webui-fullname" . }}' + + export SPARK_SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "webui-fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SPARK_SERVICE_IP:{{ .Values.WebUi.ServicePort }} + diff --git a/docker/charts/spark/templates/_helpers.tpl b/docker/charts/spark/templates/_helpers.tpl new file mode 100644 index 0000000000..066c441786 --- /dev/null +++ b/docker/charts/spark/templates/_helpers.tpl @@ -0,0 +1,43 @@ +{{/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */}} +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create fully qualified names. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "master-fullname" -}} +{{- $name := default .Chart.Name .Values.Master.Name -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "webui-fullname" -}} +{{- $name := default .Chart.Name .Values.WebUi.Name -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "worker-fullname" -}} +{{- $name := default .Chart.Name .Values.Worker.Name -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + diff --git a/docker/charts/spark/templates/spark-master-deployment.yaml b/docker/charts/spark/templates/spark-master-deployment.yaml new file mode 100644 index 0000000000..5c05ebf7f5 --- /dev/null +++ b/docker/charts/spark/templates/spark-master-deployment.yaml @@ -0,0 +1,95 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: v1 +kind: Service +metadata: + name: {{ template "master-fullname" . }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + component: "{{ .Release.Name }}-{{ .Values.Master.Component }}" +spec: + ports: + - port: {{ .Values.Master.ServicePort }} + targetPort: {{ .Values.Master.ContainerPort }} + selector: + component: "{{ .Release.Name }}-{{ .Values.Master.Component }}" + type: {{ .Values.Master.ServiceType }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "webui-fullname" . }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + component: "{{ .Release.Name }}-{{ .Values.Master.Component }}" +spec: + ports: + - port: {{ .Values.WebUi.ServicePort }} + targetPort: {{ .Values.WebUi.ContainerPort }} + selector: + component: "{{ .Release.Name }}-{{ .Values.Master.Component }}" + type: {{ .Values.WebUi.ServiceType }} +--- +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: {{ template "master-fullname" . }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + component: "{{ .Release.Name }}-{{ .Values.Master.Component }}" +spec: + replicas: {{ default 1 .Values.Master.Replicas }} + strategy: + type: RollingUpdate + selector: + matchLabels: + component: "{{ .Release.Name }}-{{ .Values.Master.Component }}" + template: + metadata: + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + component: "{{ .Release.Name }}-{{ .Values.Master.Component }}" + spec: + containers: + - name: {{ template "master-fullname" . }} + image: "{{ .Values.Master.Image }}:{{ .Values.Master.ImageTag }}" + command: ["/bin/sh","-c"] + args: ["echo $(hostname -i) {{ template "master-fullname" . }} >> /etc/hosts; {{ .Values.Spark.Path }}/bin/spark-class org.apache.spark.deploy.master.Master"] + ports: + - containerPort: {{ .Values.Master.ContainerPort }} + - containerPort: {{ .Values.WebUi.ContainerPort }} + resources: + requests: + cpu: "{{ .Values.Master.Cpu }}" + memory: "{{ .Values.Master.Memory }}" + env: + - name: SPARK_DAEMON_MEMORY + value: {{ default "1g" .Values.Master.DaemonMemory | quote }} + - name: SPARK_MASTER_HOST + value: {{ template "master-fullname" . }} + - name: SPARK_MASTER_PORT + value: {{ .Values.Master.ServicePort | quote }} + - name: SPARK_MASTER_WEBUI_PORT + value: {{ .Values.WebUi.ContainerPort | quote }} diff --git a/docker/charts/spark/templates/spark-sql-test.yaml b/docker/charts/spark/templates/spark-sql-test.yaml new file mode 100644 index 0000000000..d1a5cae33c --- /dev/null +++ b/docker/charts/spark/templates/spark-sql-test.yaml @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: v1 +kind: Pod +metadata: + name: "{{ .Release.Name }}-sql-test-{{ randAlphaNum 5 | lower }}" + annotations: + "helm.sh/hook": test-success +spec: + containers: + - name: {{ .Release.Name }}-sql-test + image: {{ .Values.Master.Image }}:{{ .Values.Master.ImageTag }} + command: ["{{ .Values.Spark.Path }}/bin/spark-sql", "--master", + "spark://{{ .Release.Name }}-master:{{ .Values.Master.ServicePort }}", "-e", + "show databases;"] + restartPolicy: Never diff --git a/docker/charts/spark/templates/spark-worker-deployment.yaml b/docker/charts/spark/templates/spark-worker-deployment.yaml new file mode 100644 index 0000000000..a44cc5302b --- /dev/null +++ b/docker/charts/spark/templates/spark-worker-deployment.yaml @@ -0,0 +1,57 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: {{ template "worker-fullname" . }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + component: "{{ .Release.Name }}-{{ .Values.Worker.Component }}" +spec: + replicas: {{ default 1 .Values.Worker.Replicas }} + strategy: + type: RollingUpdate + selector: + matchLabels: + component: "{{ .Release.Name }}-{{ .Values.Worker.Component }}" + template: + metadata: + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + component: "{{ .Release.Name }}-{{ .Values.Worker.Component }}" + spec: + containers: + - name: {{ template "worker-fullname" . }} + image: "{{ .Values.Worker.Image }}:{{ .Values.Worker.ImageTag }}" + command: ["{{ .Values.Spark.Path }}/bin/spark-class", "org.apache.spark.deploy.worker.Worker", "spark://{{ template "master-fullname" . }}:{{ .Values.Master.ServicePort }}"] + ports: + - containerPort: {{ .Values.Worker.ContainerPort }} + resources: + requests: + cpu: "{{ .Values.Worker.Cpu }}" + memory: "{{ .Values.Worker.Memory }}" + env: + - name: SPARK_DAEMON_MEMORY + value: {{ default "1g" .Values.Worker.DaemonMemory | quote }} + - name: SPARK_WORKER_MEMORY + value: {{ default "1g" .Values.Worker.ExecutorMemory | quote }} + - name: SPARK_WORKER_WEBUI_PORT + value: {{ .Values.WebUi.ContainerPort | quote }} diff --git a/docker/charts/spark/templates/spark-worker-hpa.yaml b/docker/charts/spark/templates/spark-worker-hpa.yaml new file mode 100644 index 0000000000..68c7d5db3b --- /dev/null +++ b/docker/charts/spark/templates/spark-worker-hpa.yaml @@ -0,0 +1,39 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +{{- if .Values.Worker.Autoscaling.Enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + component: "{{ .Release.Name }}-{{ .Values.Worker.Component }}" + name: {{ template "worker-fullname" . }} +spec: + scaleTargetRef: + apiVersion: apps/v1beta1 + kind: Deployment + name: {{ template "worker-fullname" . }} + minReplicas: {{ .Values.Worker.Replicas }} + maxReplicas: {{ .Values.Worker.ReplicasMax }} + metrics: + - type: Resource + resource: + name: cpu + targetAverageUtilization: {{ .Values.Worker.CpuTargetPercentage }} +{{- end }} diff --git a/docker/charts/spark/values.yaml b/docker/charts/spark/values.yaml new file mode 100644 index 0000000000..c366aa11dc --- /dev/null +++ b/docker/charts/spark/values.yaml @@ -0,0 +1,63 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Default values for spark. +# This is a YAML-formatted file. +# Declare name/value pairs to be passed into your templates. +# name: value + +Spark: + Path: "/spark" + +Master: + Name: master + Image: "bde2020/spark-master" + ImageTag: "2.2.2-hadoop2.7" + Replicas: 1 + Component: "spark-master" + Cpu: "100m" + Memory: "512Mi" + ServicePort: 7077 + ContainerPort: 7077 + # Set Master JVM memory. Default 1g + # DaemonMemory: 1g + ServiceType: LoadBalancer + +WebUi: + Name: webui + ServicePort: 8080 + ContainerPort: 8080 + ServiceType: LoadBalancer + +Worker: + Name: worker + Image: "bde2020/spark-worker" + ImageTag: "2.2.2-hadoop2.7" + Replicas: 3 + Component: "spark-worker" + Cpu: "100m" + Memory: "512Mi" + ContainerPort: 8081 + # Set Worker JVM memory. Default 1g + # DaemonMemory: 1g + # Set how much total memory workers have to give executors + # ExecutorMemory: 1g + Autoscaling: + Enabled: false + ReplicasMax: 10 + CpuTargetPercentage: 50 + diff --git a/docker/pio/Dockerfile b/docker/pio/Dockerfile index 465bb07aea..0d454184b6 100644 --- a/docker/pio/Dockerfile +++ b/docker/pio/Dockerfile @@ -56,7 +56,7 @@ RUN curl -o $PIO_HOME/lib/postgresql-$PGSQL_VERSION.jar \ WORKDIR /usr/share RUN curl -o /opt/src/spark-$SPARK_VERSION.tgz \ - http://www-us.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \ + http://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz && \ tar zxvf /opt/src/spark-$SPARK_VERSION.tgz && \ echo "SPARK_HOME="`pwd`/`ls -d spark*` >> /etc/predictionio/pio-env.sh && \ rm -rf /opt/src diff --git a/docs/manual/data/versions.yml b/docs/manual/data/versions.yml index 54fa46968b..432fa4def8 100644 --- a/docs/manual/data/versions.yml +++ b/docs/manual/data/versions.yml @@ -1,7 +1,7 @@ pio: 0.14.0 spark: 2.4.0 spark_download_filename: spark-2.4.0-bin-hadoop2.7 -elasticsearch_download_filename: elasticsearch-5.6.9 +elasticsearch_download_filename: elasticsearch-6.8.1 hbase_version: 1.2.6 hbase_basename: hbase-1.2.6 hbase_variant: bin diff --git a/docs/manual/source/install/config-datastore.html.md b/docs/manual/source/install/config-datastore.html.md deleted file mode 100644 index eb290c52c7..0000000000 --- a/docs/manual/source/install/config-datastore.html.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Changing Data Store ---- - - - -Changing Storage Setup -=================== - -PredictionIO relies on a data store to store its metadata. At the moment, PredictionIO's storage layer supports [Elasticsearch](http://www.elasticsearch.org/). Make sure you have it running and functioning properly on your computer. - -1. If you are using Elasticsearch at the localhost and its default settings, you may stop here. - -2. Otherwise, change the following in `conf/pio-env.sh` to fit your setup. - - ``` - PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch - PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=localhost - PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9300 - ``` - - -Save ``conf/pio-env.sh`` and you are done! diff --git a/docs/manual/source/install/index.html.md.erb b/docs/manual/source/install/index.html.md.erb index 7da2a948ff..9d5240919e 100644 --- a/docs/manual/source/install/index.html.md.erb +++ b/docs/manual/source/install/index.html.md.erb @@ -21,55 +21,34 @@ limitations under the License. ## Prerequisites -It is **very important** to meet the minimum version of the following +It is **very important** to meet the version of the following technologies that power Apache PredictionIO®. -* Apache Hadoop 2.6.5 (optional, required only if YARN and HDFS are needed) -* Apache Spark 2.0.2 for Hadoop 2.6 * Java SE Development Kit 8 +* Apache Spark 2.0+ +* Apache Hadoop 2.6, 2.7 and one of the following sets: -* PostgreSQL 9.1 - -or - -* MySQL 5.1 - -or - -* Apache HBase 0.98.5 -* Elasticsearch 1.7.6 - -WARNING: **Note that support for Scala 2.10 and Spark 1.6 were removed as of PredictionIO 0.14.0. -Note that support for Elasticsearch 1 is deprecated as of PredictionIO 0.14.0.** - -If you are running on a single machine, we recommend a minimum of 2GB memory. - -INFO: If you are using Linux, Apache Spark local mode, which is the default -operation mode without further configuration, may not work. In that case, -configure your Apache Spark to run in [standalone cluster -mode](http://spark.apache.org/docs/latest/spark-standalone.html). +* PostgreSQL 9.6 or MySQL 5.1 +* Apache HBase 1.2 +* Elasticsearch 6.x, 5.6(deprecated) ## Installation -* [Installing Apache PredictionIO](install-sourcecode.html) +Pre-built for the following versions -You may also use Docker to install Apache PredictionIO® +* Scala 2.11 +* Apache Spark 2.4 +* Apache Hadoop 2.7 +* Elasticsearch 6.8 -* [Installing Apache PredictionIO with Docker](install-docker.html) - - -[//]: # (* *(coming soon)* Installing Apache PredictionIO with Homebrew) +* [Downloading Binary Distribution](install-sourcecode.html#downloading-binary-distribution) +Building Apache PredictionIO +* [Downloading Source Code](install-sourcecode.html#downloading-source-code) -WARNING: **0.8.2 contains schema changes from the previous versions, if you have -installed the previous versions, you may need to clear both HBase and -Elasticsearch. See more [here](/resources/upgrade/).** +Docker - -[//]: # (## Production Deployment) - -[//]: # (For production environment setup, please refer to [Production) -[//]: # (Deployment](/production/deploy.html) guide.) +* [Installing Apache PredictionIO with Docker](install-docker.html) diff --git a/docs/manual/source/install/install-sourcecode.html.md.erb b/docs/manual/source/install/install-sourcecode.html.md.erb index fe5de3d92a..f2bce2419c 100644 --- a/docs/manual/source/install/install-sourcecode.html.md.erb +++ b/docs/manual/source/install/install-sourcecode.html.md.erb @@ -24,14 +24,6 @@ replace `/home/abc` with your own home directory wherever you see it. ## Downloading Binary Distribution -You can use pre-built binary distribution for Apache PredictionIO® if you are -building against - -* Scala 2.11.12 -* Spark 2.1.3 -* Hadoop 2.7.7 -* Elasticsearch 5.6.9 - Download [binary release from an Apache mirror](https://www.apache.org/dyn/closer.lua/predictionio/<%= data.versions.pio %>/apache-predictionio-<%= data.versions.pio %>-bin.tar.gz). @@ -100,9 +92,14 @@ and complete information. ### Building Run the following at the directory where you downloaded the source code to build -Apache PredictionIO®. -As an example, if you want to build PredictionIO to support Scala 2.11.12, -Spark 2.4.0, and Elasticsearch 6.4.2, you can do + +Apache PredictionIO®. By default, the build will be against + +* Scala 2.11.8 +* Spark 2.1.1 +* Hadoop 2.7.7 +* Elasticsearch 5.6.9 + ``` $ tar zxvf apache-predictionio-<%= data.versions.pio %>.tar.gz @@ -127,17 +124,6 @@ Extract the binary distribution you have just built. $ tar zxvf PredictionIO-<%= data.versions.pio %>.tar.gz ``` -### Building against Different Versions of Dependencies - -Starting from version 0.11.0, PredictionIO can be built against different -versions of dependencies. As of writing, one could build PredictionIO against -these different dependencies: - -* Scala 2.11.x -* Spark 2.0.x, 2.1.x, 2.2.x, 2.3.x, 2.4.x -* Hadoop 2.6.x, 2.7.x -* Elasticsearch 1.7.x(deprecated), 5.6.x, 6.x - ## Installing Dependencies Let us install dependencies inside a subdirectory of the Apache PredictionIO diff --git a/docs/manual/source/partials/shared/install/_elasticsearch.html.erb b/docs/manual/source/partials/shared/install/_elasticsearch.html.erb index 8d4e24c7f7..1d6242b4ad 100644 --- a/docs/manual/source/partials/shared/install/_elasticsearch.html.erb +++ b/docs/manual/source/partials/shared/install/_elasticsearch.html.erb @@ -45,5 +45,5 @@ setup. ``` PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=localhost -PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9300 +PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200 ``` diff --git a/docs/manual/source/partials/shared/install/_postgres.html.erb b/docs/manual/source/partials/shared/install/_postgres.html.erb index a2e6e99b77..3de4f3a799 100644 --- a/docs/manual/source/partials/shared/install/_postgres.html.erb +++ b/docs/manual/source/partials/shared/install/_postgres.html.erb @@ -54,6 +54,5 @@ $ psql -c "create user pio with password 'pio'" Starting from 0.11.0, PredictionIO no longer bundles JDBC drivers. Download the PostgreSQL JDBC driver from the [official web site](https://jdbc.postgresql.org/), and put the JAR file in the `lib` -subdirectory. By default, `conf/pio-env.sh` assumes version 42.0.0 JDBC 4.2. If -you use a different version, modify `POSTGRES_JDBC_DRIVER` to point to the -correct JAR. +subdirectory. Afterwords, you need to edit `conf/pio-env.sh` and change the +`POSTGRES_JDBC_DRIVER` variable to point to the correct JAR. diff --git a/examples/redeploy-script/redeploy.sh b/examples/redeploy-script/redeploy.sh index 157afd9344..a715229446 100755 --- a/examples/redeploy-script/redeploy.sh +++ b/examples/redeploy-script/redeploy.sh @@ -155,6 +155,10 @@ if [[ $TRAIN_RESULT -ne 0 ]]; then fi # Deploy +# Get current running instance PID +PIDBYPORT_COMMAND="lsof -t -i:$PORT" +DEPLOYEDPID=$($PIDBYPORT_COMMAND) + DEPLOY_LOG=`mktemp $LOG_DIR/tmp.XXXXXXXXXX` $($DEPLOY_COMMAND 1>$DEPLOY_LOG 2>&1) & @@ -170,6 +174,16 @@ while [[ $RETURN_VAL -ne 0 && $COUNTER -lt 20 ]]; do let COUNTER=COUNTER+1 done +# Check if the previous engine instance is running +KILLSD_COMMAND="kill $DEPLOYEDPID" +if [ -z "$DEPLOYEDPID" ] +then + printf "\nNo stale PIDs found for port $PORT\n" +else + $($KILLSD_COMMAND) + printf "\nStale PID found as $DEPLOYEDPID. Resources released.\n" +fi + cat $DEPLOY_LOG >> $LOG_FILE rm $DEPLOY_LOG echo -n "Deploy ended with return value $TRAIN_RESULT at " | tee -a $LOG_FILE diff --git a/examples/scala-parallel-classification/README.md b/examples/scala-parallel-classification/README.md index 96735bae25..a19ee429ff 100644 --- a/examples/scala-parallel-classification/README.md +++ b/examples/scala-parallel-classification/README.md @@ -17,4 +17,4 @@ limitations under the License. This is based on Classification Engine Template v0.14.0. -Please refer to http://predictionio.apache.org/templates/classification/how-to/ +Please refer to https://predictionio.apache.org/templates/classification/how-to/ diff --git a/examples/scala-parallel-classification/add-algorithm/src/main/scala/NaiveBayesAlgorithm.scala b/examples/scala-parallel-classification/add-algorithm/src/main/scala/NaiveBayesAlgorithm.scala index 0ac5e5b478..4c86bdcf40 100644 --- a/examples/scala-parallel-classification/add-algorithm/src/main/scala/NaiveBayesAlgorithm.scala +++ b/examples/scala-parallel-classification/add-algorithm/src/main/scala/NaiveBayesAlgorithm.scala @@ -37,6 +37,7 @@ class NaiveBayesAlgorithm(val ap: AlgorithmParams) @transient lazy val logger = Logger[this.type] + override def train(sc: SparkContext, data: PreparedData): NaiveBayesModel = { // MLLib NaiveBayes cannot handle empty training data. require(data.labeledPoints.take(1).nonEmpty, @@ -47,6 +48,7 @@ class NaiveBayesAlgorithm(val ap: AlgorithmParams) NaiveBayes.train(data.labeledPoints, ap.lambda) } + override def predict(model: NaiveBayesModel, query: Query): PredictedResult = { val label = model.predict(Vectors.dense( Array(query.attr0, query.attr1, query.attr2) diff --git a/examples/scala-parallel-classification/add-algorithm/src/main/scala/PrecisionEvaluation.scala b/examples/scala-parallel-classification/add-algorithm/src/main/scala/PrecisionEvaluation.scala index 019e2d8918..addb21cacb 100644 --- a/examples/scala-parallel-classification/add-algorithm/src/main/scala/PrecisionEvaluation.scala +++ b/examples/scala-parallel-classification/add-algorithm/src/main/scala/PrecisionEvaluation.scala @@ -25,6 +25,7 @@ case class Precision(label: Double) extends OptionAverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] { override def header: String = s"Precision(label = $label)" + override def calculate(query: Query, predicted: PredictedResult, actual: ActualResult) : Option[Double] = { if (predicted.label == label) { diff --git a/examples/scala-parallel-classification/add-algorithm/src/main/scala/Preparator.scala b/examples/scala-parallel-classification/add-algorithm/src/main/scala/Preparator.scala index 20d8f8c2c6..7314906348 100644 --- a/examples/scala-parallel-classification/add-algorithm/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-classification/add-algorithm/src/main/scala/Preparator.scala @@ -29,6 +29,7 @@ class PreparedData( class Preparator extends PPreparator[TrainingData, PreparedData] { + override def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { new PreparedData(trainingData.labeledPoints) } diff --git a/examples/scala-parallel-classification/add-algorithm/src/main/scala/RandomForestAlgorithm.scala b/examples/scala-parallel-classification/add-algorithm/src/main/scala/RandomForestAlgorithm.scala index 76dd7ca68d..f28d9547da 100644 --- a/examples/scala-parallel-classification/add-algorithm/src/main/scala/RandomForestAlgorithm.scala +++ b/examples/scala-parallel-classification/add-algorithm/src/main/scala/RandomForestAlgorithm.scala @@ -42,6 +42,7 @@ class RandomForestAlgorithm(val ap: RandomForestAlgorithmParams) // CHANGED Query, PredictedResult] { // CHANGED + override def train(sc: SparkContext, data: PreparedData): RandomForestModel = { // CHANGED // Empty categoricalFeaturesInfo indicates all features are continuous. @@ -57,6 +58,7 @@ class RandomForestAlgorithm(val ap: RandomForestAlgorithmParams) // CHANGED ap.maxBins) } + override def predict( model: RandomForestModel, // CHANGED query: Query): PredictedResult = { diff --git a/examples/scala-parallel-classification/reading-custom-properties/src/main/scala/NaiveBayesAlgorithm.scala b/examples/scala-parallel-classification/reading-custom-properties/src/main/scala/NaiveBayesAlgorithm.scala index 6625551268..8ee2f53372 100644 --- a/examples/scala-parallel-classification/reading-custom-properties/src/main/scala/NaiveBayesAlgorithm.scala +++ b/examples/scala-parallel-classification/reading-custom-properties/src/main/scala/NaiveBayesAlgorithm.scala @@ -37,6 +37,7 @@ class NaiveBayesAlgorithm(val ap: AlgorithmParams) @transient lazy val logger = Logger[this.type] + override def train(sc: SparkContext, data: PreparedData): NaiveBayesModel = { // MLLib NaiveBayes cannot handle empty training data. require(data.labeledPoints.take(1).nonEmpty, @@ -47,6 +48,7 @@ class NaiveBayesAlgorithm(val ap: AlgorithmParams) NaiveBayes.train(data.labeledPoints, ap.lambda) } + override def predict(model: NaiveBayesModel, query: Query): PredictedResult = { val label = model.predict(Vectors.dense( // MODIFIED diff --git a/examples/scala-parallel-classification/reading-custom-properties/src/main/scala/PrecisionEvaluation.scala b/examples/scala-parallel-classification/reading-custom-properties/src/main/scala/PrecisionEvaluation.scala index 019e2d8918..addb21cacb 100644 --- a/examples/scala-parallel-classification/reading-custom-properties/src/main/scala/PrecisionEvaluation.scala +++ b/examples/scala-parallel-classification/reading-custom-properties/src/main/scala/PrecisionEvaluation.scala @@ -25,6 +25,7 @@ case class Precision(label: Double) extends OptionAverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] { override def header: String = s"Precision(label = $label)" + override def calculate(query: Query, predicted: PredictedResult, actual: ActualResult) : Option[Double] = { if (predicted.label == label) { diff --git a/examples/scala-parallel-classification/reading-custom-properties/src/main/scala/Preparator.scala b/examples/scala-parallel-classification/reading-custom-properties/src/main/scala/Preparator.scala index 20d8f8c2c6..7314906348 100644 --- a/examples/scala-parallel-classification/reading-custom-properties/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-classification/reading-custom-properties/src/main/scala/Preparator.scala @@ -29,6 +29,7 @@ class PreparedData( class Preparator extends PPreparator[TrainingData, PreparedData] { + override def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { new PreparedData(trainingData.labeledPoints) } diff --git a/examples/scala-parallel-ecommercerecommendation/README.md b/examples/scala-parallel-ecommercerecommendation/README.md index b80c928815..60ff56060a 100644 --- a/examples/scala-parallel-ecommercerecommendation/README.md +++ b/examples/scala-parallel-ecommercerecommendation/README.md @@ -17,4 +17,4 @@ limitations under the License. This is based on E-Commerce Recommendation Template v0.14.0. -Please refer to http://predictionio.apache.org/templates/ecommercerecommendation/how-to/ +Please refer to https://predictionio.apache.org/templates/ecommercerecommendation/how-to/ diff --git a/examples/scala-parallel-ecommercerecommendation/adjust-score/src/main/scala/ECommAlgorithm.scala b/examples/scala-parallel-ecommercerecommendation/adjust-score/src/main/scala/ECommAlgorithm.scala index d63b09086c..b2643ea8bd 100644 --- a/examples/scala-parallel-ecommercerecommendation/adjust-score/src/main/scala/ECommAlgorithm.scala +++ b/examples/scala-parallel-ecommercerecommendation/adjust-score/src/main/scala/ECommAlgorithm.scala @@ -87,6 +87,7 @@ class ECommAlgorithm(val ap: ECommAlgorithmParams) @transient lazy val logger = Logger[this.type] + override def train(sc: SparkContext, data: PreparedData): ECommModel = { require(!data.viewEvents.take(1).isEmpty, s"viewEvents in PreparedData cannot be empty." + @@ -239,6 +240,7 @@ class ECommAlgorithm(val ap: ECommAlgorithmParams) buyCountsRDD.collectAsMap.toMap } + override def predict(model: ECommModel, query: Query): PredictedResult = { val userFeatures = model.userFeatures diff --git a/examples/scala-parallel-ecommercerecommendation/adjust-score/src/main/scala/Preparator.scala b/examples/scala-parallel-ecommercerecommendation/adjust-score/src/main/scala/Preparator.scala index 585aaea251..7862adde23 100644 --- a/examples/scala-parallel-ecommercerecommendation/adjust-score/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-ecommercerecommendation/adjust-score/src/main/scala/Preparator.scala @@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD class Preparator extends PPreparator[TrainingData, PreparedData] { + override def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { new PreparedData( users = trainingData.users, diff --git a/examples/scala-parallel-recommendation/README.md b/examples/scala-parallel-recommendation/README.md index bd99531972..ab1c2ec7a5 100644 --- a/examples/scala-parallel-recommendation/README.md +++ b/examples/scala-parallel-recommendation/README.md @@ -17,4 +17,4 @@ limitations under the License. This is based on Recommendation Template v0.14.0. -Please refer to http://predictionio.apache.org/templates/recommendation/how-to/ +Please refer to https://predictionio.apache.org/templates/recommendation/how-to/ diff --git a/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/ALSAlgorithm.scala b/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/ALSAlgorithm.scala index d500d673e6..c155b534b3 100644 --- a/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/ALSAlgorithm.scala +++ b/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/ALSAlgorithm.scala @@ -48,6 +48,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) s"To remedy it, set lower numIterations or checkpoint parameters.") } + override def train(sc: SparkContext, data: PreparedData): ALSModel = { // MLLib ALS cannot handle empty training data. require(!data.ratings.take(1).isEmpty, @@ -92,6 +93,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) itemStringIntMap = itemStringIntMap) } + override def predict(model: ALSModel, query: Query): PredictedResult = { // Convert String ID to Int index for Mllib model.userStringIntMap.get(query.user).map { userInt => diff --git a/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/ALSModel.scala b/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/ALSModel.scala index f3c881ea9f..ec459f3a47 100644 --- a/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/ALSModel.scala +++ b/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/ALSModel.scala @@ -59,6 +59,7 @@ class ALSModel( scored.top(num)(Ordering.by(_._2)) } + override def save(id: String, params: ALSAlgorithmParams, sc: SparkContext): Boolean = { diff --git a/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/Evaluation.scala b/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/Evaluation.scala index a6654967d9..3f1dc7859b 100644 --- a/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/Evaluation.scala +++ b/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/Evaluation.scala @@ -35,6 +35,7 @@ case class PrecisionAtK(k: Int, ratingThreshold: Double = 2.0) override def header = s"Precision@K (k=$k, threshold=$ratingThreshold)" + override def calculate(q: Query, p: PredictedResult, a: ActualResult): Option[Double] = { val positives: Set[String] = a.ratings.filter(_.rating >= ratingThreshold).map(_.item).toSet @@ -53,6 +54,7 @@ case class PositiveCount(ratingThreshold: Double = 2.0) extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] { override def header = s"PositiveCount (threshold=$ratingThreshold)" + override def calculate(q: Query, p: PredictedResult, a: ActualResult): Double = { a.ratings.filter(_.rating >= ratingThreshold).size } diff --git a/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/Preparator.scala b/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/Preparator.scala index 6a41c47c14..af7e744aef 100644 --- a/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-recommendation/blacklist-items/src/main/scala/Preparator.scala @@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD class Preparator extends PPreparator[TrainingData, PreparedData] { + override def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { new PreparedData(ratings = trainingData.ratings) } diff --git a/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/ALSAlgorithm.scala b/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/ALSAlgorithm.scala index 65f2f15ba4..13230fd98c 100644 --- a/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/ALSAlgorithm.scala +++ b/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/ALSAlgorithm.scala @@ -48,6 +48,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) s"To remedy it, set lower numIterations or checkpoint parameters.") } + override def train(sc: SparkContext, data: PreparedData): ALSModel = { // MLLib ALS cannot handle empty training data. require(!data.ratings.take(1).isEmpty, @@ -92,6 +93,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) itemStringIntMap = itemStringIntMap) } + override def predict(model: ALSModel, query: Query): PredictedResult = { // Convert String ID to Int index for Mllib model.userStringIntMap.get(query.user).map { userInt => diff --git a/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/ALSModel.scala b/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/ALSModel.scala index 898858dbc5..164781c629 100644 --- a/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/ALSModel.scala +++ b/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/ALSModel.scala @@ -39,6 +39,7 @@ class ALSModel( extends MatrixFactorizationModel(rank, userFeatures, productFeatures) with PersistentModel[ALSAlgorithmParams] { + override def save(id: String, params: ALSAlgorithmParams, sc: SparkContext): Boolean = { diff --git a/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/Evaluation.scala b/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/Evaluation.scala index a6654967d9..3f1dc7859b 100644 --- a/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/Evaluation.scala +++ b/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/Evaluation.scala @@ -35,6 +35,7 @@ case class PrecisionAtK(k: Int, ratingThreshold: Double = 2.0) override def header = s"Precision@K (k=$k, threshold=$ratingThreshold)" + override def calculate(q: Query, p: PredictedResult, a: ActualResult): Option[Double] = { val positives: Set[String] = a.ratings.filter(_.rating >= ratingThreshold).map(_.item).toSet @@ -53,6 +54,7 @@ case class PositiveCount(ratingThreshold: Double = 2.0) extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] { override def header = s"PositiveCount (threshold=$ratingThreshold)" + override def calculate(q: Query, p: PredictedResult, a: ActualResult): Double = { a.ratings.filter(_.rating >= ratingThreshold).size } diff --git a/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/Preparator.scala b/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/Preparator.scala index cf792afb51..5c09b7a3d0 100644 --- a/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-recommendation/customize-data-prep/src/main/scala/Preparator.scala @@ -34,6 +34,7 @@ case class CustomPreparatorParams( class Preparator(pp: CustomPreparatorParams) // ADDED CustomPreparatorParams extends PPreparator[TrainingData, PreparedData] { + override def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { val noTrainItems = Source.fromFile(pp.filepath).getLines.toSet // CHANGED val ratings = trainingData.ratings.filter( r => diff --git a/examples/scala-parallel-recommendation/customize-serving/src/main/scala/ALSAlgorithm.scala b/examples/scala-parallel-recommendation/customize-serving/src/main/scala/ALSAlgorithm.scala index 65f2f15ba4..13230fd98c 100644 --- a/examples/scala-parallel-recommendation/customize-serving/src/main/scala/ALSAlgorithm.scala +++ b/examples/scala-parallel-recommendation/customize-serving/src/main/scala/ALSAlgorithm.scala @@ -48,6 +48,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) s"To remedy it, set lower numIterations or checkpoint parameters.") } + override def train(sc: SparkContext, data: PreparedData): ALSModel = { // MLLib ALS cannot handle empty training data. require(!data.ratings.take(1).isEmpty, @@ -92,6 +93,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) itemStringIntMap = itemStringIntMap) } + override def predict(model: ALSModel, query: Query): PredictedResult = { // Convert String ID to Int index for Mllib model.userStringIntMap.get(query.user).map { userInt => diff --git a/examples/scala-parallel-recommendation/customize-serving/src/main/scala/ALSModel.scala b/examples/scala-parallel-recommendation/customize-serving/src/main/scala/ALSModel.scala index 898858dbc5..164781c629 100644 --- a/examples/scala-parallel-recommendation/customize-serving/src/main/scala/ALSModel.scala +++ b/examples/scala-parallel-recommendation/customize-serving/src/main/scala/ALSModel.scala @@ -39,6 +39,7 @@ class ALSModel( extends MatrixFactorizationModel(rank, userFeatures, productFeatures) with PersistentModel[ALSAlgorithmParams] { + override def save(id: String, params: ALSAlgorithmParams, sc: SparkContext): Boolean = { diff --git a/examples/scala-parallel-recommendation/customize-serving/src/main/scala/Evaluation.scala b/examples/scala-parallel-recommendation/customize-serving/src/main/scala/Evaluation.scala index a6654967d9..3f1dc7859b 100644 --- a/examples/scala-parallel-recommendation/customize-serving/src/main/scala/Evaluation.scala +++ b/examples/scala-parallel-recommendation/customize-serving/src/main/scala/Evaluation.scala @@ -35,6 +35,7 @@ case class PrecisionAtK(k: Int, ratingThreshold: Double = 2.0) override def header = s"Precision@K (k=$k, threshold=$ratingThreshold)" + override def calculate(q: Query, p: PredictedResult, a: ActualResult): Option[Double] = { val positives: Set[String] = a.ratings.filter(_.rating >= ratingThreshold).map(_.item).toSet @@ -53,6 +54,7 @@ case class PositiveCount(ratingThreshold: Double = 2.0) extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] { override def header = s"PositiveCount (threshold=$ratingThreshold)" + override def calculate(q: Query, p: PredictedResult, a: ActualResult): Double = { a.ratings.filter(_.rating >= ratingThreshold).size } diff --git a/examples/scala-parallel-recommendation/customize-serving/src/main/scala/Preparator.scala b/examples/scala-parallel-recommendation/customize-serving/src/main/scala/Preparator.scala index 6a41c47c14..af7e744aef 100644 --- a/examples/scala-parallel-recommendation/customize-serving/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-recommendation/customize-serving/src/main/scala/Preparator.scala @@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD class Preparator extends PPreparator[TrainingData, PreparedData] { + override def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { new PreparedData(ratings = trainingData.ratings) } diff --git a/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/ALSAlgorithm.scala b/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/ALSAlgorithm.scala index 65f2f15ba4..13230fd98c 100644 --- a/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/ALSAlgorithm.scala +++ b/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/ALSAlgorithm.scala @@ -48,6 +48,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) s"To remedy it, set lower numIterations or checkpoint parameters.") } + override def train(sc: SparkContext, data: PreparedData): ALSModel = { // MLLib ALS cannot handle empty training data. require(!data.ratings.take(1).isEmpty, @@ -92,6 +93,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) itemStringIntMap = itemStringIntMap) } + override def predict(model: ALSModel, query: Query): PredictedResult = { // Convert String ID to Int index for Mllib model.userStringIntMap.get(query.user).map { userInt => diff --git a/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/ALSModel.scala b/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/ALSModel.scala index 898858dbc5..164781c629 100644 --- a/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/ALSModel.scala +++ b/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/ALSModel.scala @@ -39,6 +39,7 @@ class ALSModel( extends MatrixFactorizationModel(rank, userFeatures, productFeatures) with PersistentModel[ALSAlgorithmParams] { + override def save(id: String, params: ALSAlgorithmParams, sc: SparkContext): Boolean = { diff --git a/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/Evaluation.scala b/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/Evaluation.scala index a6654967d9..3f1dc7859b 100644 --- a/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/Evaluation.scala +++ b/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/Evaluation.scala @@ -35,6 +35,7 @@ case class PrecisionAtK(k: Int, ratingThreshold: Double = 2.0) override def header = s"Precision@K (k=$k, threshold=$ratingThreshold)" + override def calculate(q: Query, p: PredictedResult, a: ActualResult): Option[Double] = { val positives: Set[String] = a.ratings.filter(_.rating >= ratingThreshold).map(_.item).toSet @@ -53,6 +54,7 @@ case class PositiveCount(ratingThreshold: Double = 2.0) extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] { override def header = s"PositiveCount (threshold=$ratingThreshold)" + override def calculate(q: Query, p: PredictedResult, a: ActualResult): Double = { a.ratings.filter(_.rating >= ratingThreshold).size } diff --git a/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/Preparator.scala b/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/Preparator.scala index 6a41c47c14..af7e744aef 100644 --- a/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-recommendation/reading-custom-events/src/main/scala/Preparator.scala @@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD class Preparator extends PPreparator[TrainingData, PreparedData] { + override def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { new PreparedData(ratings = trainingData.ratings) } diff --git a/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/ALSAlgorithm.scala b/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/ALSAlgorithm.scala index 234aa0d33a..a555b696c9 100644 --- a/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/ALSAlgorithm.scala +++ b/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/ALSAlgorithm.scala @@ -48,6 +48,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) s"To remedy it, set lower numIterations or checkpoint parameters.") } + override def train(sc: SparkContext, data: PreparedData): ALSModel = { // MLLib ALS cannot handle empty training data. require(!data.ratings.take(1).isEmpty, @@ -93,6 +94,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) itemStringIntMap = itemStringIntMap) } + override def predict(model: ALSModel, query: Query): PredictedResult = { // Convert String ID to Int index for Mllib model.userStringIntMap.get(query.user).map { userInt => diff --git a/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/ALSModel.scala b/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/ALSModel.scala index 898858dbc5..164781c629 100644 --- a/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/ALSModel.scala +++ b/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/ALSModel.scala @@ -39,6 +39,7 @@ class ALSModel( extends MatrixFactorizationModel(rank, userFeatures, productFeatures) with PersistentModel[ALSAlgorithmParams] { + override def save(id: String, params: ALSAlgorithmParams, sc: SparkContext): Boolean = { diff --git a/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/Evaluation.scala b/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/Evaluation.scala index a6654967d9..3f1dc7859b 100644 --- a/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/Evaluation.scala +++ b/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/Evaluation.scala @@ -35,6 +35,7 @@ case class PrecisionAtK(k: Int, ratingThreshold: Double = 2.0) override def header = s"Precision@K (k=$k, threshold=$ratingThreshold)" + override def calculate(q: Query, p: PredictedResult, a: ActualResult): Option[Double] = { val positives: Set[String] = a.ratings.filter(_.rating >= ratingThreshold).map(_.item).toSet @@ -53,6 +54,7 @@ case class PositiveCount(ratingThreshold: Double = 2.0) extends AverageMetric[EmptyEvaluationInfo, Query, PredictedResult, ActualResult] { override def header = s"PositiveCount (threshold=$ratingThreshold)" + override def calculate(q: Query, p: PredictedResult, a: ActualResult): Double = { a.ratings.filter(_.rating >= ratingThreshold).size } diff --git a/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/Preparator.scala b/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/Preparator.scala index 6a41c47c14..af7e744aef 100644 --- a/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-recommendation/train-with-view-event/src/main/scala/Preparator.scala @@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD class Preparator extends PPreparator[TrainingData, PreparedData] { + override def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { new PreparedData(ratings = trainingData.ratings) } diff --git a/examples/scala-parallel-similarproduct/README.md b/examples/scala-parallel-similarproduct/README.md index a1da18b4cf..404bdac293 100644 --- a/examples/scala-parallel-similarproduct/README.md +++ b/examples/scala-parallel-similarproduct/README.md @@ -17,4 +17,4 @@ limitations under the License. This is based on Similar Product Template v0.14.0. -Please refer to http://predictionio.apache.org/templates/similarproduct/how-to/ +Please refer to https://predictionio.apache.org/templates/similarproduct/how-to/ diff --git a/examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/ALSAlgorithm.scala b/examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/ALSAlgorithm.scala index 64d570c3de..618c99c595 100644 --- a/examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/ALSAlgorithm.scala +++ b/examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/ALSAlgorithm.scala @@ -62,6 +62,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) @transient lazy val logger = Logger[this.type] + override def train(sc: SparkContext, data: PreparedData): ALSModel = { require(!data.viewEvents.take(1).isEmpty, s"viewEvents in PreparedData cannot be empty." + @@ -133,6 +134,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) ) } + override def predict(model: ALSModel, query: Query): PredictedResult = { val productFeatures = model.productFeatures diff --git a/examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/CooccurrenceAlgorithm.scala b/examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/CooccurrenceAlgorithm.scala index 76307e7fa1..57844b6248 100644 --- a/examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/CooccurrenceAlgorithm.scala +++ b/examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/CooccurrenceAlgorithm.scala @@ -44,6 +44,7 @@ class CooccurrenceModel( class CooccurrenceAlgorithm(val ap: CooccurrenceAlgorithmParams) extends P2LAlgorithm[PreparedData, CooccurrenceModel, Query, PredictedResult] { + override def train(sc: SparkContext, data: PreparedData): CooccurrenceModel = { val itemStringIntMap = BiMap.stringInt(data.items.keys) @@ -103,6 +104,7 @@ class CooccurrenceAlgorithm(val ap: CooccurrenceAlgorithmParams) topCooccurrences } + override def predict(model: CooccurrenceModel, query: Query): PredictedResult = { // convert items to Int index diff --git a/examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/Preparator.scala b/examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/Preparator.scala index f2e3fa5acf..c8fc56de2b 100644 --- a/examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-similarproduct/multi-events-multi-algos/src/main/scala/Preparator.scala @@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD class Preparator extends PPreparator[TrainingData, PreparedData] { + override def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { new PreparedData( users = trainingData.users, diff --git a/examples/scala-parallel-similarproduct/recommended-user/src/main/scala/ALSAlgorithm.scala b/examples/scala-parallel-similarproduct/recommended-user/src/main/scala/ALSAlgorithm.scala index fd84284333..67bbff8330 100644 --- a/examples/scala-parallel-similarproduct/recommended-user/src/main/scala/ALSAlgorithm.scala +++ b/examples/scala-parallel-similarproduct/recommended-user/src/main/scala/ALSAlgorithm.scala @@ -58,6 +58,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) @transient lazy val logger = Logger[this.type] + override def train(sc: SparkContext, data: PreparedData): ALSModel = { require(data.followEvents.take(1).nonEmpty, s"followEvents in PreparedData cannot be empty." + @@ -125,6 +126,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) ) } + override def predict(model: ALSModel, query: Query): PredictedResult = { val similarUserFeatures = model.similarUserFeatures diff --git a/examples/scala-parallel-similarproduct/recommended-user/src/main/scala/Preparator.scala b/examples/scala-parallel-similarproduct/recommended-user/src/main/scala/Preparator.scala index a687fc1ce0..efce8b6886 100644 --- a/examples/scala-parallel-similarproduct/recommended-user/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-similarproduct/recommended-user/src/main/scala/Preparator.scala @@ -24,6 +24,7 @@ import org.apache.spark.rdd.RDD class Preparator extends PPreparator[TrainingData, PreparedData] { + override def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { new PreparedData( users = trainingData.users, diff --git a/examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/ALSAlgorithm.scala b/examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/ALSAlgorithm.scala index 3bf3402231..b2ef12566e 100644 --- a/examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/ALSAlgorithm.scala +++ b/examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/ALSAlgorithm.scala @@ -62,6 +62,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) @transient lazy val logger = Logger[this.type] + override def train(sc: SparkContext, data: PreparedData): ALSModel = { require(!data.viewEvents.take(1).isEmpty, s"viewEvents in PreparedData cannot be empty." + @@ -133,6 +134,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) ) } + override def predict(model: ALSModel, query: Query): PredictedResult = { val productFeatures = model.productFeatures diff --git a/examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/CooccurrenceAlgorithm.scala b/examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/CooccurrenceAlgorithm.scala index 470d87d5c4..e58eae82e1 100644 --- a/examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/CooccurrenceAlgorithm.scala +++ b/examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/CooccurrenceAlgorithm.scala @@ -44,6 +44,7 @@ class CooccurrenceModel( class CooccurrenceAlgorithm(val ap: CooccurrenceAlgorithmParams) extends P2LAlgorithm[PreparedData, CooccurrenceModel, Query, PredictedResult] { + override def train(sc: SparkContext, data: PreparedData): CooccurrenceModel = { val itemStringIntMap = BiMap.stringInt(data.items.keys) @@ -103,6 +104,7 @@ class CooccurrenceAlgorithm(val ap: CooccurrenceAlgorithmParams) topCooccurrences } + override def predict(model: CooccurrenceModel, query: Query): PredictedResult = { // convert items to Int index diff --git a/examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/Preparator.scala b/examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/Preparator.scala index ece997b70b..56b774e1a9 100644 --- a/examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-similarproduct/return-item-properties/src/main/scala/Preparator.scala @@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD class Preparator extends PPreparator[TrainingData, PreparedData] { + override def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { new PreparedData( users = trainingData.users, diff --git a/examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/ALSAlgorithm.scala b/examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/ALSAlgorithm.scala index 50c26b5272..7e156ff0e3 100644 --- a/examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/ALSAlgorithm.scala +++ b/examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/ALSAlgorithm.scala @@ -62,6 +62,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) @transient lazy val logger = Logger[this.type] + override def train(sc: SparkContext, data: PreparedData): ALSModel = { require(!data.viewEvents.take(1).isEmpty, s"viewEvents in PreparedData cannot be empty." + @@ -129,6 +130,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) ) } + override def predict(model: ALSModel, query: Query): PredictedResult = { val productFeatures = model.productFeatures diff --git a/examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/CooccurrenceAlgorithm.scala b/examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/CooccurrenceAlgorithm.scala index 76307e7fa1..57844b6248 100644 --- a/examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/CooccurrenceAlgorithm.scala +++ b/examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/CooccurrenceAlgorithm.scala @@ -44,6 +44,7 @@ class CooccurrenceModel( class CooccurrenceAlgorithm(val ap: CooccurrenceAlgorithmParams) extends P2LAlgorithm[PreparedData, CooccurrenceModel, Query, PredictedResult] { + override def train(sc: SparkContext, data: PreparedData): CooccurrenceModel = { val itemStringIntMap = BiMap.stringInt(data.items.keys) @@ -103,6 +104,7 @@ class CooccurrenceAlgorithm(val ap: CooccurrenceAlgorithmParams) topCooccurrences } + override def predict(model: CooccurrenceModel, query: Query): PredictedResult = { // convert items to Int index diff --git a/examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/Preparator.scala b/examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/Preparator.scala index 908b9b8c18..cc94dd947a 100644 --- a/examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-similarproduct/rid-user-set-event/src/main/scala/Preparator.scala @@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD class Preparator extends PPreparator[TrainingData, PreparedData] { + override def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { new PreparedData( items = trainingData.items, diff --git a/examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/ALSAlgorithm.scala b/examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/ALSAlgorithm.scala index 507343e0fc..6c2f28c273 100644 --- a/examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/ALSAlgorithm.scala +++ b/examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/ALSAlgorithm.scala @@ -62,6 +62,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) @transient lazy val logger = Logger[this.type] + override def train(sc:SparkContext ,data: PreparedData): ALSModel = { require(!data.rateEvents.take(1).isEmpty, // MODIFIED s"rateEvents in PreparedData cannot be empty." + // MODIFIED @@ -141,6 +142,7 @@ class ALSAlgorithm(val ap: ALSAlgorithmParams) ) } + override def predict(model: ALSModel, query: Query): PredictedResult = { val productFeatures = model.productFeatures diff --git a/examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/CooccurrenceAlgorithm.scala b/examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/CooccurrenceAlgorithm.scala index 0edc76eb9d..63ac2b7eea 100644 --- a/examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/CooccurrenceAlgorithm.scala +++ b/examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/CooccurrenceAlgorithm.scala @@ -44,6 +44,7 @@ class CooccurrenceModel( class CooccurrenceAlgorithm(val ap: CooccurrenceAlgorithmParams) extends P2LAlgorithm[PreparedData, CooccurrenceModel, Query, PredictedResult] { + override def train(sc: SparkContext, data: PreparedData): CooccurrenceModel = { val itemStringIntMap = BiMap.stringInt(data.items.keys) @@ -104,6 +105,7 @@ class CooccurrenceAlgorithm(val ap: CooccurrenceAlgorithmParams) topCooccurrences } + override def predict(model: CooccurrenceModel, query: Query): PredictedResult = { // convert items to Int index diff --git a/examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/Preparator.scala b/examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/Preparator.scala index 187e42312a..4139bce5ee 100644 --- a/examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/Preparator.scala +++ b/examples/scala-parallel-similarproduct/train-with-rate-event/src/main/scala/Preparator.scala @@ -26,6 +26,7 @@ import org.apache.spark.rdd.RDD class Preparator extends PPreparator[TrainingData, PreparedData] { + override def prepare(sc: SparkContext, trainingData: TrainingData): PreparedData = { new PreparedData( users = trainingData.users, diff --git a/project/PIOBuild.scala b/project/PIOBuild.scala index c8185a4540..615efc0998 100644 --- a/project/PIOBuild.scala +++ b/project/PIOBuild.scala @@ -27,7 +27,6 @@ object PIOBuild { val akkaVersion = settingKey[String]("The version of Akka used for building") val childrenPomExtra = settingKey[scala.xml.NodeSeq]("Extra POM data for children projects") - val elasticsearchSparkArtifact = settingKey[String]("Name of Elasticsearch-Spark artifact used for building") def binaryVersion(versionString: String): String = versionString.split('.').take(2).mkString(".") def majorVersion(versionString: String): Int = versionString.split('.')(0).toInt diff --git a/project/assembly.sbt b/project/assembly.sbt index d95475f16f..9c014713d3 100644 --- a/project/assembly.sbt +++ b/project/assembly.sbt @@ -1 +1 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.7") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.9") diff --git a/project/build.properties b/project/build.properties index 5f528e4747..1fc4b8093e 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.2.3 \ No newline at end of file +sbt.version=1.2.8 \ No newline at end of file diff --git a/project/plugins.sbt b/project/plugins.sbt index fece7e4235..0c9832c959 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -2,9 +2,9 @@ addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.9.0") addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.1.2") -addSbtPlugin("com.typesafe.sbt" % "sbt-twirl" % "1.3.15") +addSbtPlugin("com.typesafe.sbt" % "sbt-twirl" % "1.4.1") -addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "2.3") +addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "2.5") addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") @@ -12,6 +12,6 @@ resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositori addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.5.1") -addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.3.6") +addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.3.22") addSbtPlugin("com.typesafe.sbt" % "sbt-license-report" % "1.2.0") \ No newline at end of file diff --git a/storage/elasticsearch/build.sbt b/storage/elasticsearch/build.sbt index 3dbd3dec96..b7362f68df 100644 --- a/storage/elasticsearch/build.sbt +++ b/storage/elasticsearch/build.sbt @@ -19,16 +19,13 @@ import PIOBuild._ name := "apache-predictionio-data-elasticsearch" -elasticsearchVersion := (if (majorVersion(elasticsearchVersion.value) < 5) "5.6.9" else elasticsearchVersion.value) - libraryDependencies ++= Seq( - "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided", - "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided", + "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided", + "org.apache.spark" %% "spark-core" % sparkVersion.value % "provided", "org.elasticsearch.client" % "elasticsearch-rest-client" % elasticsearchVersion.value, - "org.elasticsearch" %% "elasticsearch-spark-20" % elasticsearchVersion.value + "org.elasticsearch" %% "elasticsearch-spark-20" % elasticsearchVersion.value exclude("org.apache.spark", "*"), - "org.elasticsearch" % "elasticsearch-hadoop-mr" % elasticsearchVersion.value, - "org.specs2" %% "specs2" % "2.3.13" % "test") + "org.specs2" %% "specs2" % "2.3.13" % "test") parallelExecution in Test := false @@ -36,12 +33,6 @@ pomExtra := childrenPomExtra.value assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) -assemblyShadeRules in assembly := Seq( - ShadeRule.rename("org.apache.http.**" -> - "org.apache.predictionio.shaded.org.apache.http.@1").inAll, - ShadeRule.rename("org.elasticsearch.client.**" -> - "org.apache.predictionio.shaded.org.elasticsearch.client.@1").inAll) - // skip test in assembly test in assembly := {} diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala index eef83e4f68..6661257fcf 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala @@ -36,19 +36,20 @@ import org.json4s.native.Serialization.write import grizzled.slf4j.Logging /** Elasticsearch implementation of AccessKeys. */ -class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: String) +class ESAccessKeys(client: RestClient, config: StorageClientConfig, metadataName: String) extends AccessKeys with Logging { implicit val formats = DefaultFormats.lossless - private val estype = "accesskeys" - private val internalIndex = index + "_" + estype - - ESUtils.createIndex(client, internalIndex) - val mappingJson = - (estype -> - ("properties" -> - ("key" -> ("type" -> "keyword")) ~ - ("events" -> ("type" -> "keyword")))) - ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) + private val metadataKey = "accesskeys" + private val index = metadataName + "_" + metadataKey + private val estype = { + val mappingJson = + ("mappings" -> + ("properties" -> + ("key" -> ("type" -> "keyword")) ~ + ("events" -> ("type" -> "keyword")))) + + ESUtils.createIndex(client, index, compact(render(mappingJson))) + } def insert(accessKey: AccessKey): Option[String] = { val key = if (accessKey.key.isEmpty) generateKey else accessKey.key @@ -63,7 +64,7 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin try { val response = client.performRequest( "GET", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { @@ -77,11 +78,11 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin e.getResponse.getStatusLine.getStatusCode match { case 404 => None case _ => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } } @@ -91,10 +92,10 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin val json = ("query" -> ("match_all" -> List.empty)) - ESUtils.getAll[AccessKey](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[AccessKey](client, index, compact(render(json))) } catch { case e: IOException => - error("Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -105,10 +106,10 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin ("query" -> ("term" -> ("appid" -> appid))) - ESUtils.getAll[AccessKey](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[AccessKey](client, index, compact(render(json))) } catch { case e: IOException => - error("Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -118,8 +119,8 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin try { val entity = new NStringEntity(write(accessKey), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/$id", + "PUT", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) @@ -128,11 +129,11 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin case "created" => case "updated" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to update $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to update $index/$estype/$id", e) } } @@ -140,18 +141,18 @@ class ESAccessKeys(client: RestClient, config: StorageClientConfig, index: Strin try { val response = client.performRequest( "DELETE", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { case "deleted" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/id") + error(s"[$result] Failed to delete $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/id", e) + error(s"Failed to delete $index/$estype/$id", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala index 26621cff35..bb7adf2aaf 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala @@ -36,27 +36,28 @@ import org.json4s.native.Serialization.write import grizzled.slf4j.Logging /** Elasticsearch implementation of Items. */ -class ESApps(client: RestClient, config: StorageClientConfig, index: String) +class ESApps(client: RestClient, config: StorageClientConfig, metadataName: String) extends Apps with Logging { implicit val formats = DefaultFormats.lossless - private val estype = "apps" - private val seq = new ESSequences(client, config, index) - private val internalIndex = index + "_" + estype + private val seq = new ESSequences(client, config, metadataName) + private val metadataKey = "apps" + private val index = metadataName + "_" + metadataKey + private val estype = { + val mappingJson = + ("mappings" -> + ("properties" -> + ("id" -> ("type" -> "keyword")) ~ + ("name" -> ("type" -> "keyword")))) - ESUtils.createIndex(client, internalIndex) - val mappingJson = - (estype -> - ("properties" -> - ("id" -> ("type" -> "keyword")) ~ - ("name" -> ("type" -> "keyword")))) - ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) + ESUtils.createIndex(client, index, compact(render(mappingJson))) + } def insert(app: App): Option[Int] = { val id = app.id match { case v if v == 0 => @scala.annotation.tailrec def generateId: Int = { - seq.genNext(estype).toInt match { + seq.genNext(metadataKey).toInt match { case x if !get(x).isEmpty => generateId case x => x } @@ -72,7 +73,7 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) try { val response = client.performRequest( "GET", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { @@ -86,11 +87,11 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) e.getResponse.getStatusLine.getStatusCode match { case 404 => None case _ => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } } @@ -104,20 +105,17 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$internalIndex/$estype/_search", + s"/$index/_search", Map.empty[String, String].asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) - (jsonResponse \ "hits" \ "total").extract[Long] match { - case 0 => None - case _ => - val results = (jsonResponse \ "hits" \ "hits").extract[Seq[JValue]] - val result = (results.head \ "_source").extract[App] - Some(result) + val results = (jsonResponse \ "hits" \ "hits").extract[Seq[JValue]] + results.headOption.map { jv => + (jv \ "_source").extract[App] } } catch { case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) None } } @@ -127,10 +125,10 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) val json = ("query" -> ("match_all" -> Nil)) - ESUtils.getAll[App](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[App](client, index, compact(render(json))) } catch { case e: IOException => - error("Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -138,10 +136,10 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) def update(app: App): Unit = { val id = app.id.toString try { - val entity = new NStringEntity(write(app), ContentType.APPLICATION_JSON); + val entity = new NStringEntity(write(app), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/$id", + "PUT", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) @@ -150,11 +148,11 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) case "created" => case "updated" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to update $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to update $index/$estype/$id", e) } } @@ -162,18 +160,18 @@ class ESApps(client: RestClient, config: StorageClientConfig, index: String) try { val response = client.performRequest( "DELETE", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { case "deleted" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to delete $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/id", e) + error(s"Failed to delete $index/$estype/$id", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala index ac248debf4..ebba755dc0 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala @@ -35,26 +35,27 @@ import org.json4s.native.Serialization.write import grizzled.slf4j.Logging -class ESChannels(client: RestClient, config: StorageClientConfig, index: String) +class ESChannels(client: RestClient, config: StorageClientConfig, metadataName: String) extends Channels with Logging { implicit val formats = DefaultFormats.lossless - private val estype = "channels" - private val seq = new ESSequences(client, config, index) - private val internalIndex = index + "_" + estype + private val seq = new ESSequences(client, config, metadataName) + private val metadataKey = "channels" + private val index = metadataName + "_" + metadataKey + private val estype = { + val mappingJson = + ("mappings" -> + ("properties" -> + ("name" -> ("type" -> "keyword")))) - ESUtils.createIndex(client, internalIndex) - val mappingJson = - (estype -> - ("properties" -> - ("name" -> ("type" -> "keyword")))) - ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) + ESUtils.createIndex(client, index, compact(render(mappingJson))) + } def insert(channel: Channel): Option[Int] = { val id = channel.id match { case v if v == 0 => @scala.annotation.tailrec def generateId: Int = { - seq.genNext(estype).toInt match { + seq.genNext(metadataKey).toInt match { case x if !get(x).isEmpty => generateId case x => x } @@ -70,7 +71,7 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) try { val response = client.performRequest( "GET", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { @@ -84,11 +85,11 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) e.getResponse.getStatusLine.getStatusCode match { case 404 => None case _ => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } } @@ -99,10 +100,10 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) ("query" -> ("term" -> ("appid" -> appid))) - ESUtils.getAll[Channel](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[Channel](client, index, compact(render(json))) } catch { case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -112,8 +113,8 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) try { val entity = new NStringEntity(write(channel), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/$id", + "PUT", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava, entity) val json = parse(EntityUtils.toString(response.getEntity)) @@ -122,12 +123,12 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) case "created" => true case "updated" => true case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to update $index/$estype/$id") false } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to update $index/$estype/$id", e) false } } @@ -136,18 +137,18 @@ class ESChannels(client: RestClient, config: StorageClientConfig, index: String) try { val response = client.performRequest( "DELETE", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) val result = (jsonResponse \ "result").extract[String] result match { case "deleted" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to delete $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to delete $index/$estype/$id", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala index 96f8a6720c..850bdb325e 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala @@ -36,30 +36,31 @@ import org.json4s.native.Serialization.write import grizzled.slf4j.Logging -class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: String) +class ESEngineInstances(client: RestClient, config: StorageClientConfig, metadataName: String) extends EngineInstances with Logging { implicit val formats = DefaultFormats + new EngineInstanceSerializer - private val estype = "engine_instances" - private val internalIndex = index + "_" + estype - - ESUtils.createIndex(client, internalIndex) - val mappingJson = - (estype -> - ("properties" -> - ("status" -> ("type" -> "keyword")) ~ - ("startTime" -> ("type" -> "date")) ~ - ("endTime" -> ("type" -> "date")) ~ - ("engineId" -> ("type" -> "keyword")) ~ - ("engineVersion" -> ("type" -> "keyword")) ~ - ("engineVariant" -> ("type" -> "keyword")) ~ - ("engineFactory" -> ("type" -> "keyword")) ~ - ("batch" -> ("type" -> "keyword")) ~ - ("dataSourceParams" -> ("type" -> "keyword")) ~ - ("preparatorParams" -> ("type" -> "keyword")) ~ - ("algorithmsParams" -> ("type" -> "keyword")) ~ - ("servingParams" -> ("type" -> "keyword")) + private val metadataKey = "engine_instances" + private val index = metadataName + "_" + metadataKey + private val estype = { + val mappingJson = + ("mappings" -> + ("properties" -> + ("status" -> ("type" -> "keyword")) ~ + ("startTime" -> ("type" -> "date")) ~ + ("endTime" -> ("type" -> "date")) ~ + ("engineId" -> ("type" -> "keyword")) ~ + ("engineVersion" -> ("type" -> "keyword")) ~ + ("engineVariant" -> ("type" -> "keyword")) ~ + ("engineFactory" -> ("type" -> "keyword")) ~ + ("batch" -> ("type" -> "keyword")) ~ + ("dataSourceParams" -> ("type" -> "keyword")) ~ + ("preparatorParams" -> ("type" -> "keyword")) ~ + ("algorithmsParams" -> ("type" -> "keyword")) ~ + ("servingParams" -> ("type" -> "keyword")) )) - ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) + + ESUtils.createIndex(client, index, compact(render(mappingJson))) + } def insert(i: EngineInstance): String = { val id = i.id match { @@ -84,7 +85,7 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: val entity = new NStringEntity("{}", ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$internalIndex/$estype/", + s"/$index/$estype", Map("refresh" -> "true").asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) @@ -93,12 +94,12 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: case "created" => Some((jsonResponse \ "_id").extract[String]) case _ => - error(s"[$result] Failed to create $internalIndex/$estype") + error(s"[$result] Failed to create $index/$estype") None } } catch { case e: IOException => - error(s"Failed to create $internalIndex/$estype", e) + error(s"Failed to create $index/$estype", e) None } } @@ -107,7 +108,7 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: try { val response = client.performRequest( "GET", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { @@ -121,11 +122,11 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: e.getResponse.getStatusLine.getStatusCode match { case 404 => None case _ => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } } @@ -135,10 +136,10 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: val json = ("query" -> ("match_all" -> List.empty)) - ESUtils.getAll[EngineInstance](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[EngineInstance](client, index, compact(render(json))) } catch { case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -163,10 +164,10 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: ("sort" -> List( ("startTime" -> ("order" -> "desc")))) - ESUtils.getAll[EngineInstance](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[EngineInstance](client, index, compact(render(json))) } catch { case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -185,8 +186,8 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: try { val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/$id", + "PUT", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) @@ -195,11 +196,11 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: case "created" => case "updated" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to update $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to update $index/$estype/$id", e) } } @@ -207,18 +208,18 @@ class ESEngineInstances(client: RestClient, config: StorageClientConfig, index: try { val response = client.performRequest( "DELETE", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { case "deleted" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to delete $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to delete $index/$estype/$id", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala index 0025950d03..93c3e33c8c 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala @@ -36,34 +36,35 @@ import org.json4s.native.Serialization.write import grizzled.slf4j.Logging -class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, index: String) +class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, metadataName: String) extends EvaluationInstances with Logging { implicit val formats = DefaultFormats + new EvaluationInstanceSerializer - private val estype = "evaluation_instances" - private val seq = new ESSequences(client, config, index) - private val internalIndex = index + "_" + estype + private val seq = new ESSequences(client, config, metadataName) + private val metadataKey = "evaluation_instances" + private val index = metadataName + "_" + metadataKey + private val estype = { + val mappingJson = + ("mappings" -> + ("properties" -> + ("status" -> ("type" -> "keyword")) ~ + ("startTime" -> ("type" -> "date")) ~ + ("endTime" -> ("type" -> "date")) ~ + ("evaluationClass" -> ("type" -> "keyword")) ~ + ("engineParamsGeneratorClass" -> ("type" -> "keyword")) ~ + ("batch" -> ("type" -> "keyword")) ~ + ("evaluatorResults" -> ("type" -> "text")) ~ + ("evaluatorResultsHTML" -> ("enabled" -> false)) ~ + ("evaluatorResultsJSON" -> ("enabled" -> false)))) - ESUtils.createIndex(client, internalIndex) - val mappingJson = - (estype -> - ("properties" -> - ("status" -> ("type" -> "keyword")) ~ - ("startTime" -> ("type" -> "date")) ~ - ("endTime" -> ("type" -> "date")) ~ - ("evaluationClass" -> ("type" -> "keyword")) ~ - ("engineParamsGeneratorClass" -> ("type" -> "keyword")) ~ - ("batch" -> ("type" -> "keyword")) ~ - ("evaluatorResults" -> ("type" -> "text")) ~ - ("evaluatorResultsHTML" -> ("enabled" -> false)) ~ - ("evaluatorResultsJSON" -> ("enabled" -> false)))) - ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) + ESUtils.createIndex(client, index, compact(render(mappingJson))) + } def insert(i: EvaluationInstance): String = { val id = i.id match { case v if v.isEmpty => @scala.annotation.tailrec def generateId: String = { - seq.genNext(estype).toString match { + seq.genNext(metadataKey).toString match { case x if !get(x).isEmpty => generateId case x => x } @@ -79,7 +80,7 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind try { val response = client.performRequest( "GET", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map.empty[String, String].asJava) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "found").extract[Boolean] match { @@ -93,11 +94,11 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind e.getResponse.getStatusLine.getStatusCode match { case 404 => None case _ => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } case e: IOException => - error(s"Failed to access to /$internalIndex/$estype/$id", e) + error(s"Failed to access to /$index/$estype/$id", e) None } } @@ -107,10 +108,10 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind val json = ("query" -> ("match_all" -> List.empty)) - ESUtils.getAll[EvaluationInstance](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[EvaluationInstance](client, index, compact(render(json))) } catch { case e: IOException => - error("Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -124,10 +125,10 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind ("sort" -> ("startTime" -> ("order" -> "desc"))) - ESUtils.getAll[EvaluationInstance](client, internalIndex, estype, compact(render(json))) + ESUtils.getAll[EvaluationInstance](client, index, compact(render(json))) } catch { case e: IOException => - error("Failed to access to /$internalIndex/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) Nil } } @@ -137,8 +138,8 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind try { val entity = new NStringEntity(write(i), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/$id", + "PUT", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava, entity) val json = parse(EntityUtils.toString(response.getEntity)) @@ -147,11 +148,11 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind case "created" => case "updated" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to update $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to update $index/$estype/$id", e) } } @@ -159,18 +160,18 @@ class ESEvaluationInstances(client: RestClient, config: StorageClientConfig, ind try { val response = client.performRequest( "DELETE", - s"/$internalIndex/$estype/$id", + s"/$index/$estype/$id", Map("refresh" -> "true").asJava) val json = parse(EntityUtils.toString(response.getEntity)) val result = (json \ "result").extract[String] result match { case "deleted" => case _ => - error(s"[$result] Failed to update $internalIndex/$estype/$id") + error(s"[$result] Failed to delete $index/$estype/$id") } } catch { case e: IOException => - error(s"Failed to update $internalIndex/$estype/$id", e) + error(s"Failed to delete $index/$estype/$id", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala index 708d3d33b3..8cd14344f6 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESLEvents.scala @@ -28,7 +28,7 @@ import org.apache.http.util.EntityUtils import org.apache.predictionio.data.storage.Event import org.apache.predictionio.data.storage.LEvents import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.client.{ResponseException, RestClient} +import org.elasticsearch.client.RestClient import org.joda.time.DateTime import org.json4s._ import org.json4s.JsonDSL._ @@ -38,11 +38,11 @@ import org.json4s.ext.JodaTimeSerializers import grizzled.slf4j.Logging import org.apache.http.message.BasicHeader -class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseIndex: String) +class ESLEvents(val client: RestClient, config: StorageClientConfig, val eventdataName: String) extends LEvents with Logging { implicit val formats = DefaultFormats.lossless ++ JodaTimeSerializers.all - def getEsType(appId: Int, channelId: Option[Int] = None): String = { + def eventdataKey(appId: Int, channelId: Option[Int] = None): String = { channelId.map { ch => s"${appId}_${ch}" }.getOrElse { @@ -51,11 +51,9 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd } override def init(appId: Int, channelId: Option[Int] = None): Boolean = { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype - ESUtils.createIndex(client, index) + val index = eventdataName + "_" + eventdataKey(appId, channelId) val json = - (estype -> + ("mappings" -> ("properties" -> ("name" -> ("type" -> "keyword")) ~ ("eventId" -> ("type" -> "keyword")) ~ @@ -69,31 +67,26 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd ("tags" -> ("type" -> "keyword")) ~ ("prId" -> ("type" -> "keyword")) ~ ("creationTime" -> ("type" -> "date")))) - ESUtils.createMapping(client, index, estype, compact(render(json))) + ESUtils.createIndex(client, index, compact(render(json))) true } override def remove(appId: Int, channelId: Option[Int] = None): Boolean = { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) try { - val json = - ("query" -> - ("match_all" -> List.empty)) - val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) client.performRequest( - "POST", - s"/$index/$estype/_delete_by_query", - Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, - entity).getStatusLine.getStatusCode match { - case 200 => true - case _ => - error(s"Failed to remove $index/$estype") - false - } + "DELETE", + s"/$index", + Map.empty[String, String].asJava + ).getStatusLine.getStatusCode match { + case 200 => true + case _ => + error(s"Failed to remove $index") + false + } } catch { case e: Exception => - error(s"Failed to remove $index/$estype", e) + error(s"Failed to remove $index", e) false } } @@ -105,8 +98,8 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext): Future[String] = { Future { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) + val estype = ESUtils.esType(client, index) try { val id = event.eventId.getOrElse { ESEventsUtil.getBase64UUID @@ -125,7 +118,7 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd ("properties" -> write(event.properties.toJObject)) val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", + "PUT", s"/$index/$estype/$id", Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, entity) @@ -133,7 +126,6 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val result = (jsonResponse \ "result").extract[String] result match { case "created" => id - case "updated" => id case _ => error(s"[$result] Failed to update $index/$estype/$id") "" @@ -151,8 +143,8 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext): Future[Seq[String]] = { Future { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) + val estype = ESUtils.esType(client, index) try { val ids = events.map { event => event.eventId.getOrElse(ESEventsUtil.getBase64UUID) @@ -160,7 +152,7 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val json = events.zip(ids).map { case (event, id) => val commandJson = - ("index" -> ( + ("create" -> ( ("_index" -> index) ~ ("_type" -> estype) ~ ("_id" -> id) @@ -195,12 +187,11 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val items = (responseJson \ "items").asInstanceOf[JArray] items.arr.map { case value: JObject => - val result = (value \ "index" \ "result").extract[String] - val id = (value \ "index" \ "_id").extract[String] + val result = (value \ "create" \ "result").extract[String] + val id = (value \ "create" \ "_id").extract[String] result match { case "created" => id - case "updated" => id case _ => error(s"[$result] Failed to update $index/$estype/$id") "" @@ -214,37 +205,12 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd } } - private def exists(client: RestClient, estype: String, id: Int): Boolean = { - val index = baseIndex + "_" + estype - try { - client.performRequest( - "GET", - s"/$index/$estype/$id", - Map.empty[String, String].asJava).getStatusLine.getStatusCode match { - case 200 => true - case _ => false - } - } catch { - case e: ResponseException => - e.getResponse.getStatusLine.getStatusCode match { - case 404 => false - case _ => - error(s"Failed to access to /$index/$estype/$id", e) - false - } - case e: IOException => - error(s"Failed to access to $index/$estype/$id", e) - false - } - } - override def futureGet( eventId: String, appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext): Future[Option[Event]] = { Future { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) try { val json = ("query" -> @@ -253,20 +219,17 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$index/$estype/_search", + s"/$index/_search", Map.empty[String, String].asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) - (jsonResponse \ "hits" \ "total").extract[Long] match { - case 0 => None - case _ => - val results = (jsonResponse \ "hits" \ "hits").extract[Seq[JValue]] - val result = (results.head \ "_source").extract[Event] - Some(result) + val results = (jsonResponse \ "hits" \ "hits").extract[Seq[JValue]] + results.headOption.map { jv => + (jv \ "_source").extract[Event] } } catch { case e: IOException => - error("Failed to access to /$index/$estype/_search", e) + error(s"Failed to access to /$index/_search", e) None } } @@ -277,8 +240,7 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd appId: Int, channelId: Option[Int])(implicit ec: ExecutionContext): Future[Boolean] = { Future { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) try { val json = ("query" -> @@ -287,14 +249,14 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$index/$estype/_delete_by_query", + s"/$index/_delete_by_query", Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) (jsonResponse \ "deleted").extract[Int] > 0 } catch { case e: IOException => - error(s"Failed to delete $index/$estype:$eventId", e) + error(s"Failed to delete $index:$eventId", e) false } } @@ -314,15 +276,14 @@ class ESLEvents(val client: RestClient, config: StorageClientConfig, val baseInd reversed: Option[Boolean] = None) (implicit ec: ExecutionContext): Future[Iterator[Event]] = { Future { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) try { val query = ESUtils.createEventQuery( startTime, untilTime, entityType, entityId, eventNames, targetEntityType, targetEntityId, reversed) limit.getOrElse(20) match { - case -1 => ESUtils.getEventAll(client, index, estype, query).toIterator - case size => ESUtils.getEvents(client, index, estype, query, size).toIterator + case -1 => ESUtils.getEventAll(client, index, query).toIterator + case size => ESUtils.getEvents(client, index, query, size).toIterator } } catch { case e: IOException => diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala index a86d378331..f54456f2bf 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESPEvents.scala @@ -41,11 +41,11 @@ import org.json4s.native.JsonMethods._ import org.json4s.ext.JodaTimeSerializers -class ESPEvents(client: RestClient, config: StorageClientConfig, baseIndex: String) +class ESPEvents(client: RestClient, config: StorageClientConfig, eventdataName: String) extends PEvents { implicit val formats = DefaultFormats.lossless ++ JodaTimeSerializers.all - def getEsType(appId: Int, channelId: Option[Int] = None): String = { + def eventdataKey(appId: Int, channelId: Option[Int] = None): String = { channelId.map { ch => s"${appId}_${ch}" }.getOrElse { @@ -77,10 +77,9 @@ class ESPEvents(client: RestClient, config: StorageClientConfig, baseIndex: Stri startTime, untilTime, entityType, entityId, eventNames, targetEntityType, targetEntityId, None) - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) val conf = new Configuration() - conf.set("es.resource", s"$index/$estype") + conf.set("es.resource", index) conf.set("es.query", query) conf.set("es.nodes", getESNodes()) @@ -97,8 +96,8 @@ class ESPEvents(client: RestClient, config: StorageClientConfig, baseIndex: Stri override def write( events: RDD[Event], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) + val estype = ESUtils.esType(client, index) val conf = Map("es.resource" -> s"$index/$estype", "es.nodes" -> getESNodes()) events.map { event => ESEventsUtil.eventToPut(event, appId) @@ -108,8 +107,7 @@ class ESPEvents(client: RestClient, config: StorageClientConfig, baseIndex: Stri override def delete( eventIds: RDD[String], appId: Int, channelId: Option[Int])(sc: SparkContext): Unit = { - val estype = getEsType(appId, channelId) - val index = baseIndex + "_" + estype + val index = eventdataName + "_" + eventdataKey(appId, channelId) eventIds.foreachPartition { iter => iter.foreach { eventId => try { @@ -120,19 +118,17 @@ class ESPEvents(client: RestClient, config: StorageClientConfig, baseIndex: Stri val entity = new NStringEntity(compact(render(json)), ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$index/$estype/_delete_by_query", + s"/$index/_delete_by_query", Map("refresh" -> ESUtils.getEventDataRefresh(config)).asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) - val result = (jsonResponse \ "result").extract[String] - result match { - case "deleted" => - case _ => - logger.error(s"[$result] Failed to update $index/$estype:$eventId") + if ((jsonResponse \ "deleted").extract[Int] == 0) { + logger.warn("The number of documents that were successfully deleted is 0. " + + s"$index:$eventId") } } catch { case e: IOException => - logger.error(s"Failed to update $index/$estype:$eventId", e) + logger.error(s"Failed to update $index:$eventId", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala index ade0f40ce9..0fb1a73a76 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala @@ -34,24 +34,25 @@ import org.json4s.native.Serialization.write import grizzled.slf4j.Logging -class ESSequences(client: RestClient, config: StorageClientConfig, index: String) extends Logging { +class ESSequences(client: RestClient, config: StorageClientConfig, metadataName: String) extends Logging { implicit val formats = DefaultFormats - private val estype = "sequences" - private val internalIndex = index + "_" + estype + private val metadataKey = "sequences" + private val index = metadataName + "_" + metadataKey + private val estype = { + val mappingJson = + ("mappings" -> + ("properties" -> + ("n" -> ("enabled" -> false)))) - ESUtils.createIndex(client, internalIndex) - val mappingJson = - (estype -> - ("properties" -> - ("n" -> ("enabled" -> false)))) - ESUtils.createMapping(client, internalIndex, estype, compact(render(mappingJson))) + ESUtils.createIndex(client, index, compact(render(mappingJson))) + } def genNext(name: String): Long = { try { val entity = new NStringEntity(write("n" -> name), ContentType.APPLICATION_JSON) val response = client.performRequest( - "POST", - s"/$internalIndex/$estype/$name", + "PUT", + s"/$index/$estype/$name", Map("refresh" -> "false").asJava, entity) val jsonResponse = parse(EntityUtils.toString(response.getEntity)) @@ -62,11 +63,11 @@ class ESSequences(client: RestClient, config: StorageClientConfig, index: String case "updated" => (jsonResponse \ "_version").extract[Long] case _ => - throw new IllegalStateException(s"[$result] Failed to update $internalIndex/$estype/$name") + throw new IllegalStateException(s"[$result] Failed to update $index/$estype/$name") } } catch { case e: IOException => - throw new StorageClientException(s"Failed to update $internalIndex/$estype/$name", e) + throw new StorageClientException(s"Failed to update $index/$estype/$name", e) } } } diff --git a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala index 93d5d94912..80079e319d 100644 --- a/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala +++ b/storage/elasticsearch/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala @@ -18,7 +18,6 @@ package org.apache.predictionio.data.storage.elasticsearch import scala.collection.JavaConversions._ -import scala.collection.JavaConverters._ import org.apache.http.entity.ContentType import org.apache.http.nio.entity.NStringEntity @@ -82,24 +81,22 @@ object ESUtils { def getEvents( client: RestClient, index: String, - estype: String, query: String, size: Int)( implicit formats: Formats): Seq[Event] = { - getDocList(client, index, estype, query, size).map(x => toEvent(x)) + getDocList(client, index, query, size).map(x => toEvent(x)) } def getDocList( client: RestClient, index: String, - estype: String, query: String, size: Int)( implicit formats: Formats): Seq[JValue] = { val entity = new NStringEntity(query, ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$index/$estype/_search", + s"/$index/_search", Map("size" -> s"${size}"), entity) val responseJValue = parse(EntityUtils.toString(response.getEntity)) @@ -110,25 +107,22 @@ object ESUtils { def getAll[T: Manifest]( client: RestClient, index: String, - estype: String, query: String)( implicit formats: Formats): Seq[T] = { - getDocAll(client, index, estype, query).map(x => x.extract[T]) + getDocAll(client, index, query).map(x => x.extract[T]) } def getEventAll( client: RestClient, index: String, - estype: String, query: String)( implicit formats: Formats): Seq[Event] = { - getDocAll(client, index, estype, query).map(x => toEvent(x)) + getDocAll(client, index, query).map(x => toEvent(x)) } def getDocAll( client: RestClient, index: String, - estype: String, query: String)( implicit formats: Formats): Seq[JValue] = { @@ -153,7 +147,7 @@ object ESUtils { val entity = new NStringEntity(query, ContentType.APPLICATION_JSON) val response = client.performRequest( "POST", - s"/$index/$estype/_search", + s"/$index/_search", Map("scroll" -> scrollLife), entity) val responseJValue = parse(EntityUtils.toString(response.getEntity)) @@ -164,42 +158,52 @@ object ESUtils { def createIndex( client: RestClient, - index: String): Unit = { + index: String, + json: String)( + implicit formats: Formats): String = { client.performRequest( "HEAD", s"/$index", - Map.empty[String, String].asJava).getStatusLine.getStatusCode match { + Map("include_type_name" -> "false")).getStatusLine.getStatusCode match { case 404 => + val entity = new NStringEntity(json, ContentType.APPLICATION_JSON) client.performRequest( "PUT", s"/$index", - Map.empty[String, String].asJava) + Map("include_type_name" -> "false"), + entity).getStatusLine.getStatusCode match { + case 200 => + "_doc" + case _ => + throw new IllegalStateException(s"/$index is invalid: $json") + } case 200 => + esType(client, index) case _ => - throw new IllegalStateException(s"/$index is invalid.") + throw new IllegalStateException(s"/$index is invalid: $json") } } - def createMapping( + // We cannot have several types within a single index as of ES 6.0, so + // continue to add or update a document under the current type. This code is + // a step towards ES 7.0 support (removal of mapping types). + def esType( client: RestClient, - index: String, - estype: String, - json: String): Unit = { - client.performRequest( - "HEAD", - s"/$index/_mapping/$estype", - Map.empty[String, String].asJava).getStatusLine.getStatusCode match { - case 404 => - val entity = new NStringEntity(json, ContentType.APPLICATION_JSON) - client.performRequest( - "PUT", - s"/$index/_mapping/$estype", - Map.empty[String, String].asJava, - entity) - case 200 => - case _ => - throw new IllegalStateException(s"/$index/$estype is invalid: $json") - } + index: String)( + implicit formats: Formats): String = { + val response = client.performRequest( + "GET", + s"/$index", + Map("include_type_name" -> "true")) + response.getStatusLine.getStatusCode match { + case 200 => + (parse(EntityUtils.toString(response.getEntity)) \ index \ "mappings") + .extract[JObject].values.collectFirst { + case (name, _) if name != "_doc" && name != "properties" => name + }.getOrElse("_doc") + case _ => + throw new IllegalStateException(s"/$index is invalid.") + } } def formatUTCDateTime(dt: DateTime): String = { diff --git a/storage/elasticsearch1/.gitignore b/storage/elasticsearch1/.gitignore deleted file mode 100644 index ae3c172604..0000000000 --- a/storage/elasticsearch1/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/bin/ diff --git a/storage/elasticsearch1/build.sbt b/storage/elasticsearch1/build.sbt deleted file mode 100644 index 5e72f91de1..0000000000 --- a/storage/elasticsearch1/build.sbt +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PIOBuild._ - -name := "apache-predictionio-data-elasticsearch1" - -libraryDependencies ++= Seq( - "org.apache.predictionio" %% "apache-predictionio-core" % version.value % "provided", - "org.elasticsearch" % "elasticsearch" % elasticsearchVersion.value, - "org.scalatest" %% "scalatest" % "2.1.7" % "test") - -parallelExecution in Test := false - -pomExtra := childrenPomExtra.value - -assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) - -// skip test in assembly -test in assembly := {} - -assemblyOutputPath in assembly := baseDirectory.value.getAbsoluteFile.getParentFile.getParentFile / - "assembly" / "src" / "universal" / "lib" / "spark" / - s"pio-data-elasticsearch1-assembly-${version.value}.jar" diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala deleted file mode 100644 index 5e3abe2716..0000000000 --- a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESAccessKeys.scala +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.elasticsearch - -import grizzled.slf4j.Logging -import org.apache.predictionio.data.storage.StorageClientConfig -import org.apache.predictionio.data.storage.AccessKey -import org.apache.predictionio.data.storage.AccessKeys -import org.elasticsearch.ElasticsearchException -import org.elasticsearch.client.Client -import org.elasticsearch.index.query.FilterBuilders._ -import org.json4s.JsonDSL._ -import org.json4s._ -import org.json4s.native.JsonMethods._ -import org.json4s.native.Serialization.read -import org.json4s.native.Serialization.write - -import scala.util.Random - -/** Elasticsearch implementation of AccessKeys. */ -class ESAccessKeys(client: Client, config: StorageClientConfig, index: String) - extends AccessKeys with Logging { - implicit val formats = DefaultFormats.lossless - private val estype = "accesskeys" - - val indices = client.admin.indices - val indexExistResponse = indices.prepareExists(index).get - if (!indexExistResponse.isExists) { - indices.prepareCreate(index).get - } - val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get - if (!typeExistResponse.isExists) { - val json = - (estype -> - ("properties" -> - ("key" -> ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("events" -> ("type" -> "string") ~ ("index" -> "not_analyzed")))) - indices.preparePutMapping(index).setType(estype). - setSource(compact(render(json))).get - } - - def insert(accessKey: AccessKey): Option[String] = { - val key = if (accessKey.key.isEmpty) generateKey else accessKey.key - update(accessKey.copy(key = key)) - Some(key) - } - - def get(key: String): Option[AccessKey] = { - try { - val response = client.prepareGet( - index, - estype, - key).get() - Some(read[AccessKey](response.getSourceAsString)) - } catch { - case e: ElasticsearchException => - error(e.getMessage) - None - case e: NullPointerException => None - } - } - - def getAll(): Seq[AccessKey] = { - try { - val builder = client.prepareSearch(index).setTypes(estype) - ESUtils.getAll[AccessKey](client, builder) - } catch { - case e: ElasticsearchException => - error(e.getMessage) - Seq[AccessKey]() - } - } - - def getByAppid(appid: Int): Seq[AccessKey] = { - try { - val builder = client.prepareSearch(index).setTypes(estype). - setPostFilter(termFilter("appid", appid)) - ESUtils.getAll[AccessKey](client, builder) - } catch { - case e: ElasticsearchException => - error(e.getMessage) - Nil - } - } - - def update(accessKey: AccessKey): Unit = { - try { - client.prepareIndex(index, estype, accessKey.key).setSource(write(accessKey)).get() - } catch { - case e: ElasticsearchException => - error(e.getMessage) - } - } - - def delete(key: String): Unit = { - try { - client.prepareDelete(index, estype, key).get - } catch { - case e: ElasticsearchException => - error(e.getMessage) - } - } -} diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala deleted file mode 100644 index 270af0e8b6..0000000000 --- a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESApps.scala +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.elasticsearch - -import grizzled.slf4j.Logging -import org.apache.predictionio.data.storage.StorageClientConfig -import org.apache.predictionio.data.storage.App -import org.apache.predictionio.data.storage.Apps -import org.elasticsearch.ElasticsearchException -import org.elasticsearch.client.Client -import org.elasticsearch.index.query.FilterBuilders._ -import org.json4s.JsonDSL._ -import org.json4s._ -import org.json4s.native.JsonMethods._ -import org.json4s.native.Serialization.read -import org.json4s.native.Serialization.write - -/** Elasticsearch implementation of Items. */ -class ESApps(client: Client, config: StorageClientConfig, index: String) - extends Apps with Logging { - implicit val formats = DefaultFormats.lossless - private val estype = "apps" - private val seq = new ESSequences(client, config, index) - - val indices = client.admin.indices - val indexExistResponse = indices.prepareExists(index).get - if (!indexExistResponse.isExists) { - indices.prepareCreate(index).get - } - val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get - if (!typeExistResponse.isExists) { - val json = - (estype -> - ("properties" -> - ("name" -> ("type" -> "string") ~ ("index" -> "not_analyzed")))) - indices.preparePutMapping(index).setType(estype). - setSource(compact(render(json))).get - } - - def insert(app: App): Option[Int] = { - val id = - if (app.id == 0) { - var roll = seq.genNext("apps") - while (!get(roll).isEmpty) roll = seq.genNext("apps") - roll - } - else app.id - val realapp = app.copy(id = id) - update(realapp) - Some(id) - } - - def get(id: Int): Option[App] = { - try { - val response = client.prepareGet( - index, - estype, - id.toString).get() - Some(read[App](response.getSourceAsString)) - } catch { - case e: ElasticsearchException => - error(e.getMessage) - None - case e: NullPointerException => None - } - } - - def getByName(name: String): Option[App] = { - try { - val response = client.prepareSearch(index).setTypes(estype). - setPostFilter(termFilter("name", name)).get - val hits = response.getHits().hits() - if (hits.size > 0) { - Some(read[App](hits.head.getSourceAsString)) - } else { - None - } - } catch { - case e: ElasticsearchException => - error(e.getMessage) - None - } - } - - def getAll(): Seq[App] = { - try { - val builder = client.prepareSearch(index).setTypes(estype) - ESUtils.getAll[App](client, builder) - } catch { - case e: ElasticsearchException => - error(e.getMessage) - Nil - } - } - - def update(app: App): Unit = { - try { - val response = client.prepareIndex(index, estype, app.id.toString). - setSource(write(app)).get() - } catch { - case e: ElasticsearchException => - error(e.getMessage) - } - } - - def delete(id: Int): Unit = { - try { - client.prepareDelete(index, estype, id.toString).get - } catch { - case e: ElasticsearchException => - error(e.getMessage) - } - } -} diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala deleted file mode 100644 index 52697fdee1..0000000000 --- a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESChannels.scala +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.elasticsearch - -import grizzled.slf4j.Logging -import org.apache.predictionio.data.storage.Channel -import org.apache.predictionio.data.storage.Channels -import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.ElasticsearchException -import org.elasticsearch.client.Client -import org.elasticsearch.index.query.FilterBuilders.termFilter -import org.json4s.DefaultFormats -import org.json4s.JsonDSL._ -import org.json4s.native.JsonMethods._ -import org.json4s.native.Serialization.read -import org.json4s.native.Serialization.write - -class ESChannels(client: Client, config: StorageClientConfig, index: String) - extends Channels with Logging { - - implicit val formats = DefaultFormats.lossless - private val estype = "channels" - private val seq = new ESSequences(client, config, index) - private val seqName = "channels" - - val indices = client.admin.indices - val indexExistResponse = indices.prepareExists(index).get - if (!indexExistResponse.isExists) { - indices.prepareCreate(index).get - } - val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get - if (!typeExistResponse.isExists) { - val json = - (estype -> - ("properties" -> - ("name" -> ("type" -> "string") ~ ("index" -> "not_analyzed")))) - indices.preparePutMapping(index).setType(estype). - setSource(compact(render(json))).get - } - - def insert(channel: Channel): Option[Int] = { - val id = - if (channel.id == 0) { - var roll = seq.genNext(seqName) - while (!get(roll).isEmpty) roll = seq.genNext(seqName) - roll - } else channel.id - - val realChannel = channel.copy(id = id) - if (update(realChannel)) Some(id) else None - } - - def get(id: Int): Option[Channel] = { - try { - val response = client.prepareGet( - index, - estype, - id.toString).get() - Some(read[Channel](response.getSourceAsString)) - } catch { - case e: ElasticsearchException => - error(e.getMessage) - None - case e: NullPointerException => None - } - } - - def getByAppid(appid: Int): Seq[Channel] = { - try { - val builder = client.prepareSearch(index).setTypes(estype). - setPostFilter(termFilter("appid", appid)) - ESUtils.getAll[Channel](client, builder) - } catch { - case e: ElasticsearchException => - error(e.getMessage) - Seq[Channel]() - } - } - - def update(channel: Channel): Boolean = { - try { - val response = client.prepareIndex(index, estype, channel.id.toString). - setSource(write(channel)).get() - true - } catch { - case e: ElasticsearchException => - error(e.getMessage) - false - } - } - - def delete(id: Int): Unit = { - try { - client.prepareDelete(index, estype, id.toString).get - } catch { - case e: ElasticsearchException => - error(e.getMessage) - } - } - -} diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala deleted file mode 100644 index 2d6056bfba..0000000000 --- a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEngineInstances.scala +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.elasticsearch - -import grizzled.slf4j.Logging -import org.apache.predictionio.data.storage.EngineInstance -import org.apache.predictionio.data.storage.EngineInstanceSerializer -import org.apache.predictionio.data.storage.EngineInstances -import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.ElasticsearchException -import org.elasticsearch.client.Client -import org.elasticsearch.index.query.FilterBuilders._ -import org.elasticsearch.search.sort.SortOrder -import org.json4s.JsonDSL._ -import org.json4s._ -import org.json4s.native.JsonMethods._ -import org.json4s.native.Serialization.read -import org.json4s.native.Serialization.write - -class ESEngineInstances(client: Client, config: StorageClientConfig, index: String) - extends EngineInstances with Logging { - implicit val formats = DefaultFormats + new EngineInstanceSerializer - private val estype = "engine_instances" - - val indices = client.admin.indices - val indexExistResponse = indices.prepareExists(index).get - if (!indexExistResponse.isExists) { - indices.prepareCreate(index).get - } - val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get - if (!typeExistResponse.isExists) { - val json = - (estype -> - ("properties" -> - ("status" -> ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("startTime" -> ("type" -> "date")) ~ - ("endTime" -> ("type" -> "date")) ~ - ("engineId" -> ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("engineVersion" -> - ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("engineVariant" -> - ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("engineFactory" -> - ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("batch" -> - ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("dataSourceParams" -> - ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("preparatorParams" -> - ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("algorithmsParams" -> - ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("servingParams" -> - ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("status" -> ("type" -> "string") ~ ("index" -> "not_analyzed")))) - indices.preparePutMapping(index).setType(estype). - setSource(compact(render(json))).get - } - - def insert(i: EngineInstance): String = { - try { - val response = client.prepareIndex(index, estype). - setSource(write(i)).get - response.getId - } catch { - case e: ElasticsearchException => - error(e.getMessage) - "" - } - } - - def get(id: String): Option[EngineInstance] = { - try { - val response = client.prepareGet(index, estype, id).get - if (response.isExists) { - Some(read[EngineInstance](response.getSourceAsString)) - } else { - None - } - } catch { - case e: ElasticsearchException => - error(e.getMessage) - None - } - } - - def getAll(): Seq[EngineInstance] = { - try { - val builder = client.prepareSearch(index).setTypes(estype) - ESUtils.getAll[EngineInstance](client, builder) - } catch { - case e: ElasticsearchException => - error(e.getMessage) - Nil - } - } - - def getCompleted( - engineId: String, - engineVersion: String, - engineVariant: String): Seq[EngineInstance] = { - try { - val builder = client.prepareSearch(index).setTypes(estype).setPostFilter( - andFilter( - termFilter("status", "COMPLETED"), - termFilter("engineId", engineId), - termFilter("engineVersion", engineVersion), - termFilter("engineVariant", engineVariant))). - addSort("startTime", SortOrder.DESC) - ESUtils.getAll[EngineInstance](client, builder) - } catch { - case e: ElasticsearchException => - error(e.getMessage) - Nil - } - } - - def getLatestCompleted( - engineId: String, - engineVersion: String, - engineVariant: String): Option[EngineInstance] = - getCompleted( - engineId, - engineVersion, - engineVariant).headOption - - def update(i: EngineInstance): Unit = { - try { - client.prepareUpdate(index, estype, i.id).setDoc(write(i)).get - } catch { - case e: ElasticsearchException => error(e.getMessage) - } - } - - def delete(id: String): Unit = { - try { - client.prepareDelete(index, estype, id).get - } catch { - case e: ElasticsearchException => error(e.getMessage) - } - } -} diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala deleted file mode 100644 index 68c5a7417f..0000000000 --- a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESEvaluationInstances.scala +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.elasticsearch - -import grizzled.slf4j.Logging -import org.apache.predictionio.data.storage.EvaluationInstance -import org.apache.predictionio.data.storage.EvaluationInstanceSerializer -import org.apache.predictionio.data.storage.EvaluationInstances -import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.ElasticsearchException -import org.elasticsearch.client.Client -import org.elasticsearch.index.query.FilterBuilders._ -import org.elasticsearch.search.sort.SortOrder -import org.json4s.JsonDSL._ -import org.json4s._ -import org.json4s.native.JsonMethods._ -import org.json4s.native.Serialization.read -import org.json4s.native.Serialization.write - -class ESEvaluationInstances(client: Client, config: StorageClientConfig, index: String) - extends EvaluationInstances with Logging { - implicit val formats = DefaultFormats + new EvaluationInstanceSerializer - private val estype = "evaluation_instances" - - val indices = client.admin.indices - val indexExistResponse = indices.prepareExists(index).get - if (!indexExistResponse.isExists) { - indices.prepareCreate(index).get - } - val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get - if (!typeExistResponse.isExists) { - val json = - (estype -> - ("properties" -> - ("status" -> ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("startTime" -> ("type" -> "date")) ~ - ("endTime" -> ("type" -> "date")) ~ - ("evaluationClass" -> - ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("engineParamsGeneratorClass" -> - ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("batch" -> - ("type" -> "string") ~ ("index" -> "not_analyzed")) ~ - ("evaluatorResults" -> - ("type" -> "string") ~ ("index" -> "no")) ~ - ("evaluatorResultsHTML" -> - ("type" -> "string") ~ ("index" -> "no")) ~ - ("evaluatorResultsJSON" -> - ("type" -> "string") ~ ("index" -> "no")))) - indices.preparePutMapping(index).setType(estype). - setSource(compact(render(json))).get - } - - def insert(i: EvaluationInstance): String = { - try { - val response = client.prepareIndex(index, estype). - setSource(write(i)).get - response.getId - } catch { - case e: ElasticsearchException => - error(e.getMessage) - "" - } - } - - def get(id: String): Option[EvaluationInstance] = { - try { - val response = client.prepareGet(index, estype, id).get - if (response.isExists) { - Some(read[EvaluationInstance](response.getSourceAsString)) - } else { - None - } - } catch { - case e: ElasticsearchException => - error(e.getMessage) - None - } - } - - def getAll(): Seq[EvaluationInstance] = { - try { - val builder = client.prepareSearch(index).setTypes(estype) - ESUtils.getAll[EvaluationInstance](client, builder) - } catch { - case e: ElasticsearchException => - error(e.getMessage) - Nil - } - } - - def getCompleted(): Seq[EvaluationInstance] = { - try { - val builder = client.prepareSearch(index).setTypes(estype).setPostFilter( - termFilter("status", "EVALCOMPLETED")). - addSort("startTime", SortOrder.DESC) - ESUtils.getAll[EvaluationInstance](client, builder) - } catch { - case e: ElasticsearchException => - error(e.getMessage) - Nil - } - } - - def update(i: EvaluationInstance): Unit = { - try { - client.prepareUpdate(index, estype, i.id).setDoc(write(i)).get - } catch { - case e: ElasticsearchException => error(e.getMessage) - } - } - - def delete(id: String): Unit = { - try { - client.prepareDelete(index, estype, id).get - } catch { - case e: ElasticsearchException => error(e.getMessage) - } - } -} diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala deleted file mode 100644 index 5c9e170e66..0000000000 --- a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESSequences.scala +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.elasticsearch - -import grizzled.slf4j.Logging -import org.apache.predictionio.data.storage.StorageClientConfig -import org.elasticsearch.ElasticsearchException -import org.elasticsearch.client.Client -import org.json4s.JsonDSL._ -import org.json4s._ -import org.json4s.native.JsonMethods._ - -class ESSequences(client: Client, config: StorageClientConfig, index: String) extends Logging { - implicit val formats = DefaultFormats - private val estype = "sequences" - - val indices = client.admin.indices - val indexExistResponse = indices.prepareExists(index).get - if (!indexExistResponse.isExists) { - // val settingsJson = - // ("number_of_shards" -> 1) ~ - // ("auto_expand_replicas" -> "0-all") - indices.prepareCreate(index).get - } - val typeExistResponse = indices.prepareTypesExists(index).setTypes(estype).get - if (!typeExistResponse.isExists) { - val mappingJson = - (estype -> - ("_source" -> ("enabled" -> 0)) ~ - ("_all" -> ("enabled" -> 0)) ~ - ("_type" -> ("index" -> "no")) ~ - ("enabled" -> 0)) - indices.preparePutMapping(index).setType(estype). - setSource(compact(render(mappingJson))).get - } - - def genNext(name: String): Int = { - try { - val response = client.prepareIndex(index, estype, name). - setSource(compact(render("n" -> name))).get - response.getVersion().toInt - } catch { - case e: ElasticsearchException => - error(e.getMessage) - 0 - } - } -} diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala deleted file mode 100644 index f5c99bfcbc..0000000000 --- a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/ESUtils.scala +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.elasticsearch - -import org.elasticsearch.action.search.SearchRequestBuilder -import org.elasticsearch.client.Client -import org.elasticsearch.common.unit.TimeValue -import org.json4s.Formats -import org.json4s.native.Serialization.read - -import scala.collection.mutable.ArrayBuffer - -object ESUtils { - val scrollLife = new TimeValue(60000) - - def getAll[T : Manifest]( - client: Client, - builder: SearchRequestBuilder)( - implicit formats: Formats): Seq[T] = { - val results = ArrayBuffer[T]() - var response = builder.setScroll(scrollLife).get - var hits = response.getHits().hits() - results ++= hits.map(h => read[T](h.getSourceAsString)) - while (hits.size > 0) { - response = client.prepareSearchScroll(response.getScrollId). - setScroll(scrollLife).get - hits = response.getHits().hits() - results ++= hits.map(h => read[T](h.getSourceAsString)) - } - results - } -} diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala deleted file mode 100644 index 75ac2b0229..0000000000 --- a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/StorageClient.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.elasticsearch - -import grizzled.slf4j.Logging -import org.apache.predictionio.data.storage.BaseStorageClient -import org.apache.predictionio.data.storage.StorageClientConfig -import org.apache.predictionio.data.storage.StorageClientException -import org.elasticsearch.client.transport.TransportClient -import org.elasticsearch.common.settings.ImmutableSettings -import org.elasticsearch.common.transport.InetSocketTransportAddress -import org.elasticsearch.transport.ConnectTransportException - -class StorageClient(val config: StorageClientConfig) extends BaseStorageClient - with Logging { - override val prefix = "ES" - val client = try { - val hosts = config.properties.get("HOSTS"). - map(_.split(",").toSeq).getOrElse(Seq("localhost")) - val ports = config.properties.get("PORTS"). - map(_.split(",").toSeq.map(_.toInt)).getOrElse(Seq(9300)) - val settings = ImmutableSettings.settingsBuilder() - .put("cluster.name", config.properties.getOrElse("CLUSTERNAME", "elasticsearch")) - val transportClient = new TransportClient(settings) - (hosts zip ports) foreach { hp => - transportClient.addTransportAddress( - new InetSocketTransportAddress(hp._1, hp._2)) - } - transportClient - } catch { - case e: ConnectTransportException => - throw new StorageClientException(e.getMessage, e) - } -} diff --git a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala b/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala deleted file mode 100644 index 0c549b8cf1..0000000000 --- a/storage/elasticsearch1/src/main/scala/org/apache/predictionio/data/storage/elasticsearch/package.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage - -/** Elasticsearch implementation of storage traits, supporting meta data only - * - * @group Implementation - */ -package object elasticsearch {} diff --git a/storage/elasticsearch1/src/test/resources/application.conf b/storage/elasticsearch1/src/test/resources/application.conf deleted file mode 100644 index eecae44b07..0000000000 --- a/storage/elasticsearch1/src/test/resources/application.conf +++ /dev/null @@ -1,28 +0,0 @@ -org.apache.predictionio.data.storage { - sources { - mongodb { - type = mongodb - hosts = [localhost] - ports = [27017] - } - elasticsearch { - type = elasticsearch - hosts = [localhost] - ports = [9300] - } - } - repositories { - # This section is dummy just to make storage happy. - # The actual testing will not bypass these repository settings completely. - # Please refer to StorageTestUtils.scala. - settings { - name = "test_predictionio" - source = mongodb - } - - appdata { - name = "test_predictionio_appdata" - source = mongodb - } - } -} diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala deleted file mode 100644 index 795cf7e290..0000000000 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/HB_0_8_0.scala +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.hbase.upgrade - -import org.apache.predictionio.annotation.Experimental - -import org.apache.predictionio.data.storage.Event -import org.apache.predictionio.data.storage.EventValidation -import org.apache.predictionio.data.storage.DataMap - -import org.apache.hadoop.hbase.client.Scan -import org.apache.hadoop.hbase.client.HConnection -import org.apache.hadoop.hbase.client.Result -import org.apache.hadoop.hbase.TableName -import org.apache.hadoop.hbase.util.Bytes - -import org.joda.time.DateTime -import org.joda.time.DateTimeZone - -import org.json4s.DefaultFormats -import org.json4s.JObject -import org.json4s.native.Serialization.{ read, write } - -import org.apache.commons.codec.binary.Base64 - -import scala.collection.JavaConversions._ - -/** :: Experimental :: */ -@Experimental -object HB_0_8_0 { - - implicit val formats = DefaultFormats - - def getByAppId( - connection: HConnection, - namespace: String, - appId: Int): Iterator[Event] = { - val tableName = TableName.valueOf(namespace, "events") - val table = connection.getTable(tableName) - val start = PartialRowKey(appId) - val stop = PartialRowKey(appId + 1) - val scan = new Scan(start.toBytes, stop.toBytes) - val scanner = table.getScanner(scan) - table.close() - scanner.iterator().map { resultToEvent(_) } - } - - val colNames: Map[String, Array[Byte]] = Map( - "event" -> "e", - "entityType" -> "ety", - "entityId" -> "eid", - "targetEntityType" -> "tety", - "targetEntityId" -> "teid", - "properties" -> "p", - "prId" -> "pk", // columna name is 'pk' in 0.8.0/0.8.1 - "eventTimeZone" -> "etz", - "creationTimeZone" -> "ctz" - ).mapValues(Bytes.toBytes(_)) - - - class RowKey( - val appId: Int, - val millis: Long, - val uuidLow: Long - ) { - lazy val toBytes: Array[Byte] = { - // add UUID least significant bits for multiple actions at the same time - // (UUID's most significant bits are actually timestamp, - // use eventTime instead). - Bytes.toBytes(appId) ++ Bytes.toBytes(millis) ++ Bytes.toBytes(uuidLow) - } - override def toString: String = { - Base64.encodeBase64URLSafeString(toBytes) - } - } - - object RowKey { - // get RowKey from string representation - def apply(s: String): RowKey = { - try { - apply(Base64.decodeBase64(s)) - } catch { - case e: Exception => throw new RowKeyException( - s"Failed to convert String ${s} to RowKey because ${e}", e) - } - } - - def apply(b: Array[Byte]): RowKey = { - if (b.size != 20) { - val bString = b.mkString(",") - throw new RowKeyException( - s"Incorrect byte array size. Bytes: ${bString}.") - } - - new RowKey( - appId = Bytes.toInt(b.slice(0, 4)), - millis = Bytes.toLong(b.slice(4, 12)), - uuidLow = Bytes.toLong(b.slice(12, 20)) - ) - } - } - - class RowKeyException(msg: String, cause: Exception) - extends Exception(msg, cause) { - def this(msg: String) = this(msg, null) - } - - case class PartialRowKey(val appId: Int, val millis: Option[Long] = None) { - val toBytes: Array[Byte] = { - Bytes.toBytes(appId) ++ - (millis.map(Bytes.toBytes(_)).getOrElse(Array[Byte]())) - } - } - - def resultToEvent(result: Result): Event = { - val rowKey = RowKey(result.getRow()) - - val eBytes = Bytes.toBytes("e") - // val e = result.getFamilyMap(eBytes) - - def getStringCol(col: String): String = { - val r = result.getValue(eBytes, colNames(col)) - require(r != null, - s"Failed to get value for column ${col}. " + - s"Rowkey: ${rowKey.toString} " + - s"StringBinary: ${Bytes.toStringBinary(result.getRow())}.") - - Bytes.toString(r) - } - - def getOptStringCol(col: String): Option[String] = { - val r = result.getValue(eBytes, colNames(col)) - if (r == null) { - None - } else { - Some(Bytes.toString(r)) - } - } - - def getTimestamp(col: String): Long = { - result.getColumnLatestCell(eBytes, colNames(col)).getTimestamp() - } - - val event = getStringCol("event") - val entityType = getStringCol("entityType") - val entityId = getStringCol("entityId") - val targetEntityType = getOptStringCol("targetEntityType") - val targetEntityId = getOptStringCol("targetEntityId") - val properties: DataMap = getOptStringCol("properties") - .map(s => DataMap(read[JObject](s))).getOrElse(DataMap()) - val prId = getOptStringCol("prId") - val eventTimeZone = getOptStringCol("eventTimeZone") - .map(DateTimeZone.forID(_)) - .getOrElse(EventValidation.defaultTimeZone) - val creationTimeZone = getOptStringCol("creationTimeZone") - .map(DateTimeZone.forID(_)) - .getOrElse(EventValidation.defaultTimeZone) - - val creationTime: DateTime = new DateTime( - getTimestamp("event"), creationTimeZone - ) - - Event( - eventId = Some(RowKey(result.getRow()).toString), - event = event, - entityType = entityType, - entityId = entityId, - targetEntityType = targetEntityType, - targetEntityId = targetEntityId, - properties = properties, - eventTime = new DateTime(rowKey.millis, eventTimeZone), - tags = Nil, - prId = prId, - creationTime = creationTime - ) - } -} diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala deleted file mode 100644 index 1759561207..0000000000 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade.scala +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.hbase.upgrade - -import org.apache.predictionio.annotation.Experimental - -import org.apache.predictionio.data.storage.Storage -import org.apache.predictionio.data.storage.hbase.HBLEvents -import org.apache.predictionio.data.storage.hbase.HBEventsUtil - -import scala.collection.JavaConversions._ - -/** :: Experimental :: */ -@Experimental -object Upgrade { - - def main(args: Array[String]) { - val fromAppId = args(0).toInt - val toAppId = args(1).toInt - val batchSize = args.lift(2).map(_.toInt).getOrElse(100) - val fromNamespace = args.lift(3).getOrElse("predictionio_eventdata") - - upgrade(fromAppId, toAppId, batchSize, fromNamespace) - } - - /* For upgrade from 0.8.0 or 0.8.1 to 0.8.2 only */ - def upgrade( - fromAppId: Int, - toAppId: Int, - batchSize: Int, - fromNamespace: String) { - - val events = Storage.getLEvents().asInstanceOf[HBLEvents] - - // Assume already run "pio app new " (new app already created) - // TODO: check if new table empty and warn user if not - val newTable = events.getTable(toAppId) - - val newTableName = newTable.getName().getNameAsString() - println(s"Copying data from ${fromNamespace}:events for app ID ${fromAppId}" - + s" to new HBase table ${newTableName}...") - - HB_0_8_0.getByAppId( - events.client.connection, - fromNamespace, - fromAppId).grouped(batchSize).foreach { eventGroup => - val puts = eventGroup.map{ e => - val (put, rowkey) = HBEventsUtil.eventToPut(e, toAppId) - put - } - newTable.put(puts.toList) - } - - newTable.flushCommits() - newTable.close() - println("Done.") - } - -} diff --git a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala b/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala deleted file mode 100644 index de74d46dce..0000000000 --- a/storage/hbase/src/main/scala/org/apache/predictionio/data/storage/hbase/upgrade/Upgrade_0_8_3.scala +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.predictionio.data.storage.hbase.upgrade - -import org.apache.predictionio.annotation.Experimental - -import grizzled.slf4j.Logger -import org.apache.predictionio.data.storage.Storage -import org.apache.predictionio.data.storage.DataMap -import org.apache.predictionio.data.storage.hbase.HBLEvents -import org.apache.predictionio.data.storage.hbase.HBEventsUtil - -import scala.collection.JavaConversions._ - -import scala.concurrent._ -import ExecutionContext.Implicits.global -import org.apache.predictionio.data.storage.LEvents -import scala.concurrent.Await -import scala.concurrent.duration.Duration -import java.lang.Thread - -object CheckDistribution { - def entityType(eventClient: LEvents, appId: Int) - : Map[(String, Option[String]), Int] = { - eventClient - .find(appId = appId) - .foldLeft(Map[(String, Option[String]), Int]().withDefaultValue(0)) { - case (m, e) => { - val k = (e.entityType, e.targetEntityType) - m.updated(k, m(k) + 1) - } - } - } - - def runMain(appId: Int) { - val eventClient = Storage.getLEvents().asInstanceOf[HBLEvents] - - entityType(eventClient, appId) - .toSeq - .sortBy(-_._2) - .foreach { println } - - } - - def main(args: Array[String]) { - runMain(args(0).toInt) - } - -} - -/** :: Experimental :: */ -@Experimental -object Upgrade_0_8_3 { - val NameMap = Map( - "pio_user" -> "user", - "pio_item" -> "item") - val RevNameMap = NameMap.toSeq.map(_.swap).toMap - - val logger = Logger[this.type] - - def main(args: Array[String]) { - val fromAppId = args(0).toInt - val toAppId = args(1).toInt - - runMain(fromAppId, toAppId) - } - - def runMain(fromAppId: Int, toAppId: Int): Unit = { - upgrade(fromAppId, toAppId) - } - - - val obsEntityTypes = Set("pio_user", "pio_item") - val obsProperties = Set( - "pio_itypes", "pio_starttime", "pio_endtime", - "pio_inactive", "pio_price", "pio_rating") - - def hasPIOPrefix(eventClient: LEvents, appId: Int): Boolean = { - eventClient.find(appId = appId).filter( e => - (obsEntityTypes.contains(e.entityType) || - e.targetEntityType.map(obsEntityTypes.contains(_)).getOrElse(false) || - (!e.properties.keySet.forall(!obsProperties.contains(_))) - ) - ).hasNext - } - - def isEmpty(eventClient: LEvents, appId: Int): Boolean = - !eventClient.find(appId = appId).hasNext - - - def upgradeCopy(eventClient: LEvents, fromAppId: Int, toAppId: Int) { - val fromDist = CheckDistribution.entityType(eventClient, fromAppId) - - logger.info("FromAppId Distribution") - fromDist.toSeq.sortBy(-_._2).foreach { e => logger.info(e) } - - val events = eventClient - .find(appId = fromAppId) - .zipWithIndex - .foreach { case (fromEvent, index) => { - if (index % 50000 == 0) { - // logger.info(s"Progress: $fromEvent $index") - logger.info(s"Progress: $index") - } - - - val fromEntityType = fromEvent.entityType - val toEntityType = NameMap.getOrElse(fromEntityType, fromEntityType) - - val fromTargetEntityType = fromEvent.targetEntityType - val toTargetEntityType = fromTargetEntityType - .map { et => NameMap.getOrElse(et, et) } - - val toProperties = DataMap(fromEvent.properties.fields.map { - case (k, v) => - val newK = if (obsProperties.contains(k)) { - val nK = k.stripPrefix("pio_") - logger.info(s"property ${k} will be renamed to ${nK}") - nK - } else k - (newK, v) - }) - - val toEvent = fromEvent.copy( - entityType = toEntityType, - targetEntityType = toTargetEntityType, - properties = toProperties) - - eventClient.insert(toEvent, toAppId) - }} - - - val toDist = CheckDistribution.entityType(eventClient, toAppId) - - logger.info("Recap fromAppId Distribution") - fromDist.toSeq.sortBy(-_._2).foreach { e => logger.info(e) } - - logger.info("ToAppId Distribution") - toDist.toSeq.sortBy(-_._2).foreach { e => logger.info(e) } - - val fromGood = fromDist - .toSeq - .forall { case (k, c) => { - val (et, tet) = k - val net = NameMap.getOrElse(et, et) - val ntet = tet.map(tet => NameMap.getOrElse(tet, tet)) - val nk = (net, ntet) - val nc = toDist.getOrElse(nk, -1) - val checkMatch = (c == nc) - if (!checkMatch) { - logger.info(s"${k} doesn't match: old has ${c}. new has ${nc}.") - } - checkMatch - }} - - val toGood = toDist - .toSeq - .forall { case (k, c) => { - val (et, tet) = k - val oet = RevNameMap.getOrElse(et, et) - val otet = tet.map(tet => RevNameMap.getOrElse(tet, tet)) - val ok = (oet, otet) - val oc = fromDist.getOrElse(ok, -1) - val checkMatch = (c == oc) - if (!checkMatch) { - logger.info(s"${k} doesn't match: new has ${c}. old has ${oc}.") - } - checkMatch - }} - - if (!fromGood || !toGood) { - logger.error("Doesn't match!! There is an import error.") - } else { - logger.info("Count matches. Looks like we are good to go.") - } - } - - /* For upgrade from 0.8.2 to 0.8.3 only */ - def upgrade(fromAppId: Int, toAppId: Int) { - - val eventClient = Storage.getLEvents().asInstanceOf[HBLEvents] - - require(fromAppId != toAppId, - s"FromAppId: $fromAppId must be different from toAppId: $toAppId") - - if (hasPIOPrefix(eventClient, fromAppId)) { - require( - isEmpty(eventClient, toAppId), - s"Target appId: $toAppId is not empty. Please run " + - "`pio app data-delete ` to clean the data before upgrading") - - logger.info(s"$fromAppId isEmpty: " + isEmpty(eventClient, fromAppId)) - - upgradeCopy(eventClient, fromAppId, toAppId) - - } else { - logger.info(s"From appId: ${fromAppId} doesn't contain" - + s" obsolete entityTypes ${obsEntityTypes} or" - + s" obsolete properties ${obsProperties}." - + " No need data migration." - + s" You can continue to use appId ${fromAppId}.") - } - - logger.info("Done.") - } - - -} diff --git a/tests/docker-files/env-conf/pio-env.sh b/tests/docker-files/env-conf/pio-env.sh index 9230d954e5..3e39957a97 100644 --- a/tests/docker-files/env-conf/pio-env.sh +++ b/tests/docker-files/env-conf/pio-env.sh @@ -88,16 +88,7 @@ PIO_STORAGE_SOURCES_ELASTICSEARCH_TYPE=elasticsearch #PIO_STORAGE_SOURCES_ELASTICSEARCH_CLUSTERNAME=pio PIO_STORAGE_SOURCES_ELASTICSEARCH_HOSTS=elasticsearch PIO_STORAGE_SOURCES_ELASTICSEARCH_SCHEMES=http -if [ ! -z "$PIO_ELASTICSEARCH_VERSION" ]; then - ES_MAJOR=`echo $PIO_ELASTICSEARCH_VERSION | awk -F. '{print $1}'` -else - ES_MAJOR=1 -fi -if [ "$ES_MAJOR" = "1" ]; then - PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9300 -else - PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200 -fi +PIO_STORAGE_SOURCES_ELASTICSEARCH_PORTS=9200 #PIO_STORAGE_SOURCES_ELASTICSEARCH_HOME=$ELASTICSEARCH_HOME # Local File System Example diff --git a/tests/pio_tests/engines/recommendation-engine/build.sbt b/tests/pio_tests/engines/recommendation-engine/build.sbt index 14454179d5..c76c1f2c79 100644 --- a/tests/pio_tests/engines/recommendation-engine/build.sbt +++ b/tests/pio_tests/engines/recommendation-engine/build.sbt @@ -15,10 +15,6 @@ * limitations under the License. */ -import AssemblyKeys._ - -assemblySettings - scalaVersion in ThisBuild := sys.env.getOrElse("PIO_SCALA_VERSION", "2.11.12") name := "template-scala-parallel-recommendation" @@ -26,6 +22,6 @@ name := "template-scala-parallel-recommendation" organization := "org.apache.predictionio" libraryDependencies ++= Seq( - "org.apache.predictionio" %% "apache-predictionio-core" % "0.14.0" % "provided", - "org.apache.spark" %% "spark-core" % sys.env.getOrElse("PIO_SPARK_VERSION", "2.1.1") % "provided", - "org.apache.spark" %% "spark-mllib" % sys.env.getOrElse("PIO_SPARK_VERSION", "2.1.1") % "provided") + "org.apache.predictionio" %% "apache-predictionio-core" % "0.15.0-SNAPSHOT" % "provided", + "org.apache.spark" %% "spark-core" % sys.env.getOrElse("PIO_SPARK_VERSION", "2.1.3") % "provided", + "org.apache.spark" %% "spark-mllib" % sys.env.getOrElse("PIO_SPARK_VERSION", "2.1.3") % "provided") diff --git a/tests/pio_tests/engines/recommendation-engine/project/assembly.sbt b/tests/pio_tests/engines/recommendation-engine/project/assembly.sbt index 54c32528e9..9c014713d3 100644 --- a/tests/pio_tests/engines/recommendation-engine/project/assembly.sbt +++ b/tests/pio_tests/engines/recommendation-engine/project/assembly.sbt @@ -1 +1 @@ -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.9") diff --git a/tools/src/main/scala/org/apache/predictionio/tools/RunWorkflow.scala b/tools/src/main/scala/org/apache/predictionio/tools/RunWorkflow.scala index 50b93370fd..701a3ee62f 100644 --- a/tools/src/main/scala/org/apache/predictionio/tools/RunWorkflow.scala +++ b/tools/src/main/scala/org/apache/predictionio/tools/RunWorkflow.scala @@ -52,14 +52,14 @@ object RunWorkflow extends Logging { val jarFiles = jarFilesForScala(engineDirPath).map(_.toURI) val args = - (if (wa.mainPyFile.isEmpty) { + { val variantJson = wa.variantJson.getOrElse(new File(engineDirPath, "engine.json")) val ei = Console.getEngineInfo(variantJson, engineDirPath) Seq( "--engine-id", ei.engineId, "--engine-version", ei.engineVersion, "--engine-variant", variantJson.toURI.toString) - } else Nil) ++ + } ++ wa.engineFactory.map( x => Seq("--engine-factory", x)).getOrElse(Nil) ++ wa.engineParamsKey.map( diff --git a/tools/src/main/scala/org/apache/predictionio/tools/console/Pio.scala b/tools/src/main/scala/org/apache/predictionio/tools/console/Pio.scala index 1b4c8a86ec..a3691d009a 100644 --- a/tools/src/main/scala/org/apache/predictionio/tools/console/Pio.scala +++ b/tools/src/main/scala/org/apache/predictionio/tools/console/Pio.scala @@ -24,6 +24,7 @@ import grizzled.slf4j.Logging import scala.concurrent.Await import scala.concurrent.duration.Duration +import scala.io.StdIn import scala.language.implicitConversions import scala.sys.process._ @@ -238,7 +239,7 @@ object Pio extends Logging { info(f" ${ch.name}%16s | ${ch.id}%10s") } - val choice = if(force) "YES" else readLine("Enter 'YES' to proceed: ") + val choice = if(force) "YES" else StdIn.readLine("Enter 'YES' to proceed: ") choice match { case "YES" => AppCmd.delete(name) @@ -278,7 +279,7 @@ object Pio extends Logging { info(s" App ID: ${appDesc.app.id}") info(s" Description: ${appDesc.app.description}") - val choice = if(force) "YES" else readLine("Enter 'YES' to proceed: ") + val choice = if(force) "YES" else StdIn.readLine("Enter 'YES' to proceed: ") choice match { case "YES" => AppCmd.dataDelete(name, channel, all) @@ -307,7 +308,7 @@ object Pio extends Logging { info(s" Channel ID: ${chan.id}") info(s" App Name: ${appDesc.app.name}") info(s" App ID: ${appDesc.app.id}") - val choice = if(force) "YES" else readLine("Enter 'YES' to proceed: ") + val choice = if(force) "YES" else StdIn.readLine("Enter 'YES' to proceed: ") choice match { case "YES" => AppCmd.channelDelete(appName, deleteChannel)