Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RasterRef should not read HDFS scheme with GDAL reader. #319

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ rasterframes {
showable-tiles = true
showable-max-cells = 20
max-truncate-row-element-length = 40
raster-source-cache-timeout = 120 seconds
raster-source-cache-timeout = 1 seconds
vpipkt marked this conversation as resolved.
Show resolved Hide resolved
}

vlm.gdal {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,12 @@ object RasterSource extends LazyLogging {
ExpressionEncoder()
}

def apply(source: String): RasterSource = apply(new URI(source))

def apply(source: URI): RasterSource =
rsCache.get(
source.toASCIIString, _ => source match {
// rsCache.get(
// source.toASCIIString, _ =>
source match {
vpipkt marked this conversation as resolved.
Show resolved Hide resolved
case IsGDAL() => GDALRasterSource(source)
case IsHadoopGeoTiff() =>
// TODO: How can we get the active hadoop configuration
Expand All @@ -113,7 +116,7 @@ object RasterSource extends LazyLogging {
case IsDefaultGeoTiff() => JVMGeoTiffRasterSource(source)
case s => throw new UnsupportedOperationException(s"Reading '$s' not supported")
}
)
// )

object IsGDAL {

Expand All @@ -122,6 +125,8 @@ object RasterSource extends LazyLogging {

val gdalOnlyExtensions = Seq(".jp2", ".mrf", ".hdf", ".vrt")

val blacklistedSchemes = Seq("s3a", "s3n", "wasbs")

def gdalOnly(source: URI): Boolean =
if (gdalOnlyExtensions.exists(source.getPath.toLowerCase.endsWith)) {
require(GDALRasterSource.hasGDAL, s"Can only read $source if GDAL is available")
Expand All @@ -130,26 +135,43 @@ object RasterSource extends LazyLogging {

/** Extractor for determining if a scheme indicates GDAL preference. */
def unapply(source: URI): Boolean = {

lazy val schemeIsNotHadoop = Option(source.getScheme())
.filter(blacklistedSchemes.contains)
.isEmpty

lazy val schemeIsGdal = Option(source.getScheme())
.exists(_.startsWith("gdal"))
.exists(_ == "gdal") && schemeIsNotHadoop

(gdalOnly(source) && schemeIsNotHadoop) ||
(GDALRasterSource.hasGDAL &&
(preferGdal && schemeIsGdal) ||
(preferGdal && schemeIsNotHadoop)
)

gdalOnly(source) || ((preferGdal || schemeIsGdal) && GDALRasterSource.hasGDAL)
}
}

object IsDefaultGeoTiff {
def unapply(source: URI): Boolean = source.getScheme match {
case "file" | "http" | "https" | "s3" => true
case null | "" ⇒ true
case _ => false
import IsGDAL.gdalOnly
def unapply(source: URI): Boolean = {
if (gdalOnly(source)) false
else source.getScheme match {
case "file" | "http" | "https" | "s3" => true
case null | "" ⇒ true
case _ => false
}
}
}

object IsHadoopGeoTiff {
def unapply(source: URI): Boolean = source.getScheme match {
case "hdfs" | "s3n" | "s3a" | "wasb" | "wasbs" => true
case _ => false
}
import IsGDAL.gdalOnly
def unapply(source: URI): Boolean =
if (gdalOnly(source)) false
else source.getScheme match {
case "hdfs" | "s3n" | "s3a" | "wasb" | "wasbs" => true
case _ => false
}
}

trait URIRasterSource { _: RasterSource =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,16 @@ class RasterSourceSpec extends TestEnvironment with TestData {
}

if(GDALRasterSource.hasGDAL) {
it("should choose correct delegate for scheme and file"){
val hdfsSchemeTif = RasterSource("s3n://bucket/prefix/raster.tif")
val easySchemeTif = RasterSource("s3://bucket/prefix/raster.tif") // should interpret as /vsis3/
lazy val hdfsSchemeJp2 = RasterSource("s3n://s22s-test-geotiffs/luray_snp/B04.jp2") // can't read with hadoop reader

hdfsSchemeTif should matchPattern {case HadoopGeoTiffRasterSource(_, _) ⇒}
easySchemeTif should matchPattern {case GDALRasterSource(_) ⇒}
metasim marked this conversation as resolved.
Show resolved Hide resolved
assertThrows[UnsupportedOperationException](hdfsSchemeJp2.bandCount)

}
describe("GDAL Rastersource") {
val gdal = GDALRasterSource(cogPath)
val jvm = JVMGeoTiffRasterSource(cogPath)
Expand Down
3 changes: 0 additions & 3 deletions pyrasterframes/src/main/python/tests/RasterFunctionsTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,9 +300,6 @@ def test_render_composite(self):
# Look for the PNG magic cookie
self.assertEqual(png_bytes[0:8], bytearray([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]))




def test_rf_interpret_cell_type_as(self):
from pyspark.sql import Row
from pyrasterframes.rf_types import Tile
Expand Down