igraph
options(conflicts.policy = list(warn.conflicts = FALSE))
library(tidyverse)
library(igraph)
# Create Parquet file
set.seed(42)
data <- tibble(
id = 10:30,
graph = map(id, ~ sample_gnp(n = ., p = 0.3)),
payload = runif(length(id)),
)
# Can't write graph
parquet_data <-
data |>
mutate(edgelists = map(graph, as_data_frame)) |>
select(-graph)
# FIXME: Can't write with duckplyr?
arrow::write_parquet(parquet_data, "igraph.parquet")
# Read and process data with duckplyr
lazy_data <- duckplyr::read_parquet_duckdb("igraph.parquet")
igraph_computation_input <-
lazy_data |>
select(id, edgelists) |>
# Explicitly bring into memory
collect()
igraph_result <-
igraph_computation_input |>
mutate(
graph = map(edgelists, graph_from_data_frame),
num_components = map_int(graph, ~ diameter(.x))
) |>
select(id, num_components)
# Continue with lazy operation
lazy_data_with_result <-
lazy_data |>
left_join(igraph_result, by = "id")
lazy_data_with_result |>
explain()
#> ┌───────────────────────────┐
#> │ PROJECTION │
#> │ ──────────────────── │
#> │ id │
#> │ payload │
#> │ edgelists │
#> │ num_components │
#> │ │
#> │ ~21 rows │
#> └─────────────┬─────────────┘
#> ┌─────────────┴─────────────┐
#> │ HASH_JOIN │
#> │ ──────────────────── │
#> │ Join Type: LEFT │
#> │ │
#> │ Conditions: ├──────────────┐
#> │ id_x IS NOT DISTINCT FROM │ │
#> │ id_y │ │
#> │ │ │
#> │ ~21 rows │ │
#> └─────────────┬─────────────┘ │
#> ┌─────────────┴─────────────┐┌─────────────┴─────────────┐
#> │ READ_PARQUET ││ R_DATAFRAME_SCAN │
#> │ ──────────────────── ││ ──────────────────── │
#> │ Function: ││ Text: data.frame │
#> │ READ_PARQUET ││ │
#> │ ││ Projections: │
#> │ Projections: ││ id │
#> │ id ││ num_components │
#> │ payload ││ │
#> │ edgelists ││ │
#> │ ││ │
#> │ ~21 rows ││ ~21 rows │
#> └───────────────────────────┘└───────────────────────────┘
Created on 2026-02-22 with reprex v2.1.1
nested
library(tidyverse)
data <- tibble(
a = 1:5,
b = vctrs::as_list_of(map(1:5, ~ letters[seq_len(.x)])),
c = tibble(c1 = 11:15, c2 = 21:25),
d = vctrs::as_list_of(map(1:5, ~ tibble(x1 = rev(seq_len(.x)), x2 = LETTERS[seq_len(.x)]))),
e = vctrs::as_list_of(map(1:5, ~ tibble(x = vctrs::as_list_of(map(1:3, ~ tibble(z = vctrs::list_of(1, 2))))))),
)
data
#> # A tibble: 5 × 5
#> a b c$c1 $c2 d e
#> <int> <list<chr>> <int> <int> <list<tibble[,2]>> <list<tibble[,1]>>
#> 1 1 [1] 11 21 [1 × 2] [3 × 1]
#> 2 2 [2] 12 22 [2 × 2] [3 × 1]
#> 3 3 [3] 13 23 [3 × 2] [3 × 1]
#> 4 4 [4] 14 24 [4 × 2] [3 × 1]
#> 5 5 [5] 15 25 [5 × 2] [3 × 1]
data$e[[3]]$x[[1]]$z
#> <list_of<double>[2]>
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 2
duckplyr::compute_parquet(data, "nested-duckdb.parquet")
#> Error in `duckplyr::compute_parquet()`:
#> ! Can't convert columns of class <vctrs_list_of/vctrs_vctr/list> to
#> relational. Affected column: `b`.
Created on 2026-02-22 with reprex v2.1.1
igraph
Created on 2026-02-22 with reprex v2.1.1
nested
Created on 2026-02-22 with reprex v2.1.1