Skip to content

Move to Collections API and optimize count algorithm #47

@lossyrob

Description

@lossyrob

This code provides a starting point for the Collections API optimizations, and is basically an optimized version of def histograms(nlcd: TileLayerRDD[SpatialKey], soil: TileLayerRDD[SpatialKey], multiPolygons: Seq[MultiPolygon]): Seq[Map[(Int, Int), Int]] that uses the Collections API:

val layerReader = S3CollectionLayerReader("datahub-catalogs-us-east-1", "")
val queryPoly: MultiPolygon = ???
val nlcdLayerId = LayerId("nlcd-2011-30m-epsg5070-int8", 0)
val soilLayerId = LayerId("ssurgo-hydro-groups-30m-epsg5070-int8", 0)

val (nlcdLayer, soilLayer) = {
  val fetch1 =
    Future {
      timeFetch(s"  Fetch Time NLCD:") {
        baseLayerReader
          .query[SpatialKey, Tile, TileLayerMetadata[SpatialKey]](nlcdLayerId)
          .where(Intersects(queryPoly)).result
      }
    }

  val fetch2 =
    Future {
      timeFetch(s"  Fetch Time SOIL:") {
        baseLayerReader
          .query[SpatialKey, Tile, TileLayerMetadata[SpatialKey]](soilLayerId)
          .where(Intersects(queryPoly)).result
      }
    }

  val fetches =
    for(
      (_, layer1) <- fetch1;
      (_, layer2) <- fetch2
    ) yield { (layer1, layer2) }

  Await.result(fetches, 10 minutes)
}

histograms(
  nlcdLayer,
  soilLayer,
  queryPoly
)

where histograms is:

def histograms(
  layer1: TileLayerCollection[SpatialKey],
  layer2: TileLayerCollection[SpatialKey],
  polygon: MultiPolygon
): Map[(Int, Int), Long] = {
  import spire.syntax.cfor._

  val mapTransform = layer1.metadata.layout.mapTransform

  val layer1Map = layer1.toMap
  val layer2Map = layer2.toMap

  val result =
    layer1Map.keys.toSet
      .intersect(layer2Map.keys.toSet)
      .par
      .map { k =>
        val tileResult = mutable.Map[(Int, Int), Long]()
        val (tile1, tile2) = (layer1Map(k), layer2Map(k))
        // Assumes tile1.dimensions == tile2.dimensions
        val re = RasterExtent(mapTransform(k), tile1.cols, tile1.rows)

        if(polygon.contains(re.extent)) {
          cfor(0)(_ < re.cols, _ + 1) { col =>
            cfor(0)(_ < re.rows, _ + 1) { row =>
              val v1 = tile1.get(col, row)
              val v2 = tile2.get(col, row)
              val k = (v1, v2)
              if(!tileResult.contains(k)) { tileResult(k) = 0 }
              tileResult(k) += 1
            }
          }
        } else {
          polygon.foreach(re, Options(includePartial=true, sampleType=PixelIsArea)) { (col, row) =>
            val v1 = tile1.get(col, row)
            val v2 = tile2.get(col, row)
            val k = (v1, v2)
            if(!tileResult.contains(k)) { tileResult(k) = 0 }
            tileResult(k) += 1
          }
        }

        tileResult.toMap
      }
      .reduce { (m1, m2) =>
        (m1.toSeq ++ m2.toSeq)
          .groupBy(_._1)
          .map { case(k, values) => k -> values.map(_._2).sum }
      }

  result
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions