This code provides a starting point for the Collections API optimizations, and is basically an optimized version of def histograms(nlcd: TileLayerRDD[SpatialKey], soil: TileLayerRDD[SpatialKey], multiPolygons: Seq[MultiPolygon]): Seq[Map[(Int, Int), Int]] that uses the Collections API:
val layerReader = S3CollectionLayerReader("datahub-catalogs-us-east-1", "")
val queryPoly: MultiPolygon = ???
val nlcdLayerId = LayerId("nlcd-2011-30m-epsg5070-int8", 0)
val soilLayerId = LayerId("ssurgo-hydro-groups-30m-epsg5070-int8", 0)
val (nlcdLayer, soilLayer) = {
val fetch1 =
Future {
timeFetch(s" Fetch Time NLCD:") {
baseLayerReader
.query[SpatialKey, Tile, TileLayerMetadata[SpatialKey]](nlcdLayerId)
.where(Intersects(queryPoly)).result
}
}
val fetch2 =
Future {
timeFetch(s" Fetch Time SOIL:") {
baseLayerReader
.query[SpatialKey, Tile, TileLayerMetadata[SpatialKey]](soilLayerId)
.where(Intersects(queryPoly)).result
}
}
val fetches =
for(
(_, layer1) <- fetch1;
(_, layer2) <- fetch2
) yield { (layer1, layer2) }
Await.result(fetches, 10 minutes)
}
histograms(
nlcdLayer,
soilLayer,
queryPoly
)
where histograms is:
def histograms(
layer1: TileLayerCollection[SpatialKey],
layer2: TileLayerCollection[SpatialKey],
polygon: MultiPolygon
): Map[(Int, Int), Long] = {
import spire.syntax.cfor._
val mapTransform = layer1.metadata.layout.mapTransform
val layer1Map = layer1.toMap
val layer2Map = layer2.toMap
val result =
layer1Map.keys.toSet
.intersect(layer2Map.keys.toSet)
.par
.map { k =>
val tileResult = mutable.Map[(Int, Int), Long]()
val (tile1, tile2) = (layer1Map(k), layer2Map(k))
// Assumes tile1.dimensions == tile2.dimensions
val re = RasterExtent(mapTransform(k), tile1.cols, tile1.rows)
if(polygon.contains(re.extent)) {
cfor(0)(_ < re.cols, _ + 1) { col =>
cfor(0)(_ < re.rows, _ + 1) { row =>
val v1 = tile1.get(col, row)
val v2 = tile2.get(col, row)
val k = (v1, v2)
if(!tileResult.contains(k)) { tileResult(k) = 0 }
tileResult(k) += 1
}
}
} else {
polygon.foreach(re, Options(includePartial=true, sampleType=PixelIsArea)) { (col, row) =>
val v1 = tile1.get(col, row)
val v2 = tile2.get(col, row)
val k = (v1, v2)
if(!tileResult.contains(k)) { tileResult(k) = 0 }
tileResult(k) += 1
}
}
tileResult.toMap
}
.reduce { (m1, m2) =>
(m1.toSeq ++ m2.toSeq)
.groupBy(_._1)
.map { case(k, values) => k -> values.map(_._2).sum }
}
result
}
This code provides a starting point for the Collections API optimizations, and is basically an optimized version of
def histograms(nlcd: TileLayerRDD[SpatialKey], soil: TileLayerRDD[SpatialKey], multiPolygons: Seq[MultiPolygon]): Seq[Map[(Int, Int), Int]]that uses the Collections API:where
histogramsis: