Skip to content

Commit

Permalink
Merge pull request #562 from s22s/feature/pyspark-3.1
Browse files Browse the repository at this point in the history
Python binding updates for PySpark 3.1
  • Loading branch information
metasim authored Sep 27, 2021
2 parents 10a7fa3 + b9d2344 commit cc02644
Show file tree
Hide file tree
Showing 32 changed files with 519 additions and 349 deletions.
3 changes: 3 additions & 0 deletions .circleci/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*
!requirements-conda.txt
!fix-permissions
66 changes: 19 additions & 47 deletions .circleci/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,63 +1,35 @@
FROM circleci/openjdk:8-jdk

ENV MINICONDA_VERSION=4.8.2 \
MINICONDA_MD5=87e77f097f6ebb5127c77662dfc3165e \
CONDA_VERSION=4.8.2 \
CONDA_DIR=/opt/conda \
PYTHON_VERSION=3.7.7
FROM circleci/openjdk:11-jdk
#LABEL org.opencontainers.image.source=https://github.com/locationtech/rasterframes

USER root

ENV PATH=$CONDA_DIR/bin:$PATH

# circleci is 3434
COPY --chown=3434:3434 fix-permissions /tmp

# See: https://docs.conda.io/projects/conda/en/latest/user-guide/install/rpm-debian.html
RUN \
apt-get update && \
apt-get install -yq --no-install-recommends \
sudo \
wget \
bzip2 \
file \
libtinfo5 \
ca-certificates \
gettext-base \
locales && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
curl -s https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list

RUN \
cd /tmp && \
mkdir -p $CONDA_DIR && \
wget --quiet https://repo.continuum.io/miniconda/Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh && \
echo "${MINICONDA_MD5} *Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh" | md5sum -c - && \
/bin/bash Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh -f -b -p $CONDA_DIR && \
rm Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh && \
conda config --system --set auto_update_conda false && \
conda config --system --set show_channel_urls true && \
conda config --system --set channel_priority strict && \
if [ ! $PYTHON_VERSION = 'default' ]; then conda install --yes python=$PYTHON_VERSION; fi && \
conda list python | grep '^python ' | tr -s ' ' | cut -d '.' -f 1,2 | sed 's/$/.*/' >> $CONDA_DIR/conda-meta/pinned && \
conda install --quiet --yes conda && \
conda install --quiet --yes pip && \
pip config set global.progress_bar off && \
echo "$CONDA_DIR/lib" > /etc/ld.so.conf.d/conda.conf && \
conda clean --all --force-pkgs-dirs --yes --quiet && \
sh /tmp/fix-permissions $CONDA_DIR 2> /dev/null
apt-get update && \
apt-get install -yq --no-install-recommends conda && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

COPY requirements-conda.txt /tmp/
ENV CONDA_DIR=/opt/conda
ENV PATH=$CONDA_DIR/bin:$PATH

COPY requirements-conda.txt fix-permissions /tmp
RUN \
conda install --channel conda-forge --no-channel-priority --freeze-installed \
--file /tmp/requirements-conda.txt && \
conda install --quiet --yes --channel=conda-forge --file=/tmp/requirements-conda.txt && \
echo "$CONDA_DIR/lib" > /etc/ld.so.conf.d/conda.conf && \
ldconfig && \
conda clean --all --force-pkgs-dirs --yes --quiet && \
sh /tmp/fix-permissions $CONDA_DIR 2> /dev/null && \
ldconfig 2> /dev/null
sh /tmp/fix-permissions $CONDA_DIR


# Work-around for pyproj issue https://github.com/pyproj4/pyproj/issues/415
ENV PROJ_LIB=/opt/conda/share/proj

USER 3434

WORKDIR /home/circleci
30 changes: 19 additions & 11 deletions .circleci/Makefile
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
IMAGE_NAME=miniconda-gdal
VERSION=latest
HOST=docker.pkg.github.com
REPO=${HOST}/locationtech/rasterframes
FULL_NAME=${REPO}/${IMAGE_NAME}:${VERSION}
IMAGE_NAME=circleci-openjdk-conda-gdal
SHA=$(shell git log -n1 --format=format:"%H" | cut -c 1-7)
VERSION?=$(SHA)
HOST=docker.io
REPO=$(HOST)/s22s
FULL_NAME=$(REPO)/$(IMAGE_NAME):$(VERSION)

all: build login push
.DEFAULT_GOAL := help
help:
# http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html
@echo "Usage: make [target]"
@echo "Targets: "
@grep -E '^[a-zA-Z0-9_%/-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\t\033[36m%-20s\033[0m %s\n", $$1, $$2}'

build:
all: build push ## Build and then push image

build: ## Build the docker image
docker build . -t ${FULL_NAME}

login:
docker login ${HOST}
login: ## Login to the docker registry
docker login

push:
push: login ## Push docker image to registry
docker push ${FULL_NAME}

shell: build
run: build ## Build image and launch shell
docker run --rm -it ${FULL_NAME} bash
22 changes: 10 additions & 12 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@ orbs:
executors:
default:
docker:
- image: s22s/miniconda-gdal:latest
- image: s22s/circleci-openjdk-conda-gdal:b8e30ee
working_directory: ~/repo
environment:
SBT_VERSION: 1.3.8
SBT_OPTS: -Xmx768m
SBT_OPTS: "-Xms64m -Xmx1536m -Djava.awt.headless=true -Dsun.io.serialization.extendedDebugInfo=true"
commands:
setup:
description: Setup for sbt build
Expand All @@ -24,8 +23,7 @@ orbs:
steps:
- run:
name: "Compile Scala via sbt"
command: |-
sbt -v -batch compile test:compile it:compile
command: sbt -v -batch compile test:compile it:compile

python:
commands:
Expand Down Expand Up @@ -60,6 +58,7 @@ orbs:
mkdir -p /tmp/core_dumps
ls -lh /tmp
cp core.* *.hs /tmp/core_dumps 2> /dev/null || true
cp core/* /tmp/core_dumps/ 2> /dev/null || true
cp -r /tmp/hsperfdata* /tmp/*.hprof /tmp/core_dumps 2> /dev/null || true
when: on_fail

Expand Down Expand Up @@ -125,24 +124,23 @@ jobs:

- run:
name: "Scala Tests: core"
command: sbt -batch core/test
command: sbt -v -batch core/test

- run:
name: "Scala Tests: datasource"
command: sbt -batch datasource/test
command: sbt -v -batch datasource/test

- run:
name: "Scala Tests: experimental"
command: sbt -batch experimental/test
command: sbt -v -batch experimental/test

- run:
name: "Create PyRasterFrames package"
command: |-
sbt -v -batch pyrasterframes/package
command: sbt -v -batch pyrasterframes/package

- run:
name: "Python Tests"
command: sbt -batch pyrasterframes/test
command: sbt -v -batch pyrasterframes/test

- rasterframes/save-artifacts
- rasterframes/save-cache
Expand Down Expand Up @@ -249,4 +247,4 @@ workflows:
- test
- it
- it-no-gdal
- docs
- docs
15 changes: 0 additions & 15 deletions .circleci/fix-permissions
Original file line number Diff line number Diff line change
@@ -1,19 +1,4 @@
#!/usr/bin/env bash
# set permissions on a directory
# after any installation, if a directory needs to be (human) user-writable,
# run this script on it.
# It will make everything in the directory owned by the group $NB_GID
# and writable by that group.
# Deployments that want to set a specific user id can preserve permissions
# by adding the `--group-add users` line to `docker run`.

# uses find to avoid touching files that already have the right permissions,
# which would cause massive image explosion

# right permissions are:
# group=$NB_GID
# AND permissions include group rwX (directory-execute)
# AND directories have setuid,setgid bits set

set -e

Expand Down
4 changes: 3 additions & 1 deletion .circleci/requirements-conda.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
gdal==2.4.4
python==3.8
gdal==3.1.2
libspatialindex
rasterio[s3]
rtree
1 change: 0 additions & 1 deletion .sbtopts

This file was deleted.

2 changes: 1 addition & 1 deletion .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version = 3.0.3
version = 3.0.4
runner.dialect = scala212
indent.main = 2
indent.significant = 2
Expand Down
11 changes: 9 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
*
*/

// Leave me an my custom keys alone!
Global / lintUnusedKeysOnLoad := false

addCommandAlias("makeSite", "docs/makeSite")
addCommandAlias("previewSite", "docs/previewSite")
addCommandAlias("ghpagesPushSite", "docs/ghpagesPushSite")
Expand Down Expand Up @@ -52,6 +55,10 @@ lazy val core = project
libraryDependencies ++= Seq(
`slf4j-api`,
shapeless,
circe("core").value,
circe("generic").value,
circe("parser").value,
circe("generic-extras").value,
frameless excludeAll ExclusionRule("com.github.mpilquist", "simulacrum"),
`jts-core`,
`spray-json`,
Expand Down Expand Up @@ -152,14 +159,14 @@ lazy val docs = project
.dependsOn(core, datasource, pyrasterframes)
.enablePlugins(SiteScaladocPlugin, ParadoxPlugin, ParadoxMaterialThemePlugin, GhpagesPlugin, ScalaUnidocPlugin)
.settings(
apiURL := Some(url("http://rasterframes.io/latest/api")),
apiURL := Some(url("https://rasterframes.io/latest/api")),
autoAPIMappings := true,
ghpagesNoJekyll := true,
ScalaUnidoc / siteSubdirName := "latest/api",
paradox / siteSubdirName := ".",
paradoxProperties ++= Map(
"version" -> version.value,
"scaladoc.org.apache.spark.sql.rf" -> "http://rasterframes.io/latest",
"scaladoc.org.apache.spark.sql.rf" -> "https://rasterframes.io/latest",
"github.base_url" -> ""
),
paradoxNavigationExpandDepth := Some(3),
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class TileUDT extends UserDefinedType[Tile] {
def userClass: Class[Tile] = classOf[Tile]

def sqlType: StructType = StructType(Seq(
StructField("cell_type", StringType, false),
StructField("cellType", StringType, false),
StructField("cols", IntegerType, false),
StructField("rows", IntegerType, false),
StructField("cells", BinaryType, true),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ package object expressions {
registry.registerExpression[GetCRS]("rf_crs")
registry.registerExpression[RealizeTile]("rf_tile")
registry.registerExpression[CreateProjectedRaster]("rf_proj_raster")
registry.registerExpression[Subtract]("rf_local_subtract")
registry.registerExpression[Multiply]("rf_local_multiply")
registry.registerExpression[Divide]("rf_local_divide")
registry.registerExpression[NormalizedDifference]("rf_normalized_difference")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,38 +99,38 @@ package object functions {
private[rasterframes] val tileOnes: (Int, Int, String) => Tile = (cols, rows, cellTypeName) =>
makeConstantTile(1, cols, rows, cellTypeName)

val reproject_and_merge_f: (Row, CRS, Seq[Tile], Seq[Row], Seq[CRS], Row, String) => Tile = (leftExtentEnc: Row, leftCRSEnc: CRS, tiles: Seq[Tile], rightExtentEnc: Seq[Row], rightCRSEnc: Seq[CRS], leftDimsEnc: Row, resampleMethod: String) => {
if (tiles.isEmpty) null
val reproject_and_merge_f: (Row, CRS, Seq[Tile], Seq[Row], Seq[CRS], Row, String) => Option[Tile] = (leftExtentEnc: Row, leftCRS: CRS, tiles: Seq[Tile], rightExtentEnc: Seq[Row], rightCRSs: Seq[CRS], leftDimsEnc: Row, resampleMethod: String) => {
if (tiles.isEmpty) None
else {
require(tiles.length == rightExtentEnc.length && tiles.length == rightCRSEnc.length, "size mismatch")
require(tiles.length == rightExtentEnc.length && tiles.length == rightCRSs.length, "size mismatch")

val leftExtent: Extent = leftExtentEnc.as[Extent]
val leftDims: Dimensions[Int] = leftDimsEnc.as[Dimensions[Int]]
val leftCRS: CRS = leftCRSEnc
lazy val rightExtents: Seq[Extent] = rightExtentEnc.map(_.as[Extent])
lazy val rightCRSs: Seq[CRS] = rightCRSEnc
val leftExtent = Option(leftExtentEnc).map(_.as[Extent])
val leftDims = Option(leftDimsEnc).map(_.as[Dimensions[Int]])
lazy val rightExtents = rightExtentEnc.map(_.as[Extent])
lazy val resample = resampleMethod match {
case ResampleMethod(mm) => mm
case _ => throw new IllegalArgumentException(s"Unable to parse ResampleMethod for ${resampleMethod}.")
}

if (leftExtent == null || leftDims == null || leftCRS == null) null
else {

val cellType = tiles.map(_.cellType).reduceOption(_ union _).getOrElse(tiles.head.cellType)

// TODO: how to allow control over... expression?
val projOpts = Reproject.Options(resample)
val dest: Tile = ArrayTile.empty(cellType, leftDims.cols, leftDims.rows)
//is there a GT function to do all this?
tiles.zip(rightExtents).zip(rightCRSs).map {
case ((tile, extent), crs) =>
tile.reproject(extent, crs, leftCRS, projOpts)
}.foldLeft(dest)((d, t) =>
d.merge(leftExtent, t.extent, t.tile, projOpts.method)
)
}
}
(leftExtent, leftDims, Option(leftCRS))
.zipped
.map((leftExtent, leftDims, leftCRS) => {
val cellType = tiles
.map(_.cellType)
.reduceOption(_ union _)
.getOrElse(tiles.head.cellType)

// TODO: how to allow control over... expression?
val projOpts = Reproject.Options(resample)
val dest: Tile = ArrayTile.empty(cellType, leftDims.cols, leftDims.rows)
//is there a GT function to do all this?
tiles.zip(rightExtents).zip(rightCRSs).map {
case ((tile, extent), crs) =>
tile.reproject(extent, crs, leftCRS, projOpts)
}.foldLeft(dest)((d, t) =>
d.merge(leftExtent, t.extent, t.tile, projOpts.method)
)
})
}.headOption
}

// NB: Don't be tempted to make this a `val`. Spark will barf if `withRasterFrames` hasn't been called first.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,23 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
* Copyright 2021 Azavea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* [http://www.apache.org/licenses/LICENSE-2.0]
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*
* SPDX-License-Identifier: Apache-2.0
*
*/
package org.locationtech.rasterframes.ref

import geotrellis.raster.GridBounds
Expand Down
Loading

0 comments on commit cc02644

Please sign in to comment.