Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@
^vignettes/articles$
^CRAN-SUBMISSION$
^data-raw$
^[.]?air[.]toml$
^\.vscode$
5 changes: 5 additions & 0 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"recommendations": [
"Posit.air-vscode"
]
}
10 changes: 10 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"[r]": {
"editor.formatOnSave": true,
"editor.defaultFormatter": "Posit.air-vscode"
},
"[quarto]": {
"editor.formatOnSave": true,
"editor.defaultFormatter": "quarto.quarto"
}
}
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ Config/testthat/parallel: true
Encoding: UTF-8
Language: en-US
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
15 changes: 10 additions & 5 deletions R/compat-purrr.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,16 @@ imap <- function(.x, .f, ...) {
pmap <- function(.l, .f, ...) {
.f <- as.function(.f)
args <- .rlang_purrr_args_recycle(.l)
do.call("mapply", c(
FUN = list(quote(.f)),
args, MoreArgs = quote(list(...)),
SIMPLIFY = FALSE, USE.NAMES = FALSE
))
do.call(
"mapply",
c(
FUN = list(quote(.f)),
args,
MoreArgs = quote(list(...)),
SIMPLIFY = FALSE,
USE.NAMES = FALSE
)
)
}
.rlang_purrr_args_recycle <- function(args) {
lengths <- map_int(args, length)
Expand Down
10 changes: 8 additions & 2 deletions R/encoding.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ html_encoding_guess <- function(x) {
#' @rdname html_encoding_guess
#' @usage NULL
guess_encoding <- function(x) {
lifecycle::deprecate_warn("1.0.0", "guess_encoding()", "html_encoding_guess()")
lifecycle::deprecate_warn(
"1.0.0",
"guess_encoding()",
"html_encoding_guess()"
)
html_encoding_guess(x)
}

Expand All @@ -46,7 +50,9 @@ guess_encoding <- function(x) {
#' @param from The encoding that the string is actually in. If `NULL`,
#' `guess_encoding` will be used.
repair_encoding <- function(x, from = NULL) {
lifecycle::deprecate_warn("1.0.0", "html_encoding_repair()",
lifecycle::deprecate_warn(
"1.0.0",
"html_encoding_repair()",
details = "Instead, re-load using the `encoding` argument of `read_html()`"
)

Expand Down
33 changes: 25 additions & 8 deletions R/form.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ html_form.xml_node <- function(x, base_url = NULL) {

nodes <- html_elements(x, "input, select, textarea, button")
fields <- lapply(nodes, function(x) {
switch(xml2::xml_name(x),
switch(
xml2::xml_name(x),
textarea = parse_textarea(x),
input = parse_input(x),
select = parse_select(x),
Expand All @@ -76,7 +77,8 @@ html_form.xml_node <- function(x, base_url = NULL) {
enctype = enctype,
fields = fields
),
class = "rvest_form")
class = "rvest_form"
)
}

#' @export
Expand Down Expand Up @@ -107,7 +109,9 @@ html_form_set <- function(form, ...) {
if (type == "hidden") {
cli::cli_warn("Setting value of hidden field {.str {field}}.")
} else if (type == "submit") {
cli::cli_abort("Can't change value of input with type submit: {.str {field}}.")
cli::cli_abort(
"Can't change value of input with type submit: {.str {field}}."
)
}

form$fields[[field]]$value <- new_values[[field]]
Expand All @@ -134,12 +138,18 @@ html_form_submit <- function(form, submit = NULL) {
submission_build <- function(form, submit, error_call = caller_env()) {
method <- form$method
if (!(method %in% c("POST", "GET"))) {
cli::cli_warn("Invalid method ({method}), defaulting to GET.", call = error_call)
cli::cli_warn(
"Invalid method ({method}), defaulting to GET.",
call = error_call
)
method <- "GET"
}

if (length(form$action) == 0) {
cli::cli_abort("`form` doesn't contain a `action` attribute.", call = error_call)
cli::cli_abort(
"`form` doesn't contain a `action` attribute.",
call = error_call
)
}

list(
Expand All @@ -158,7 +168,11 @@ submission_submit <- function(x, ...) {
}
}

submission_build_values <- function(form, submit = NULL, error_call = caller_env()) {
submission_build_values <- function(
form,
submit = NULL,
error_call = caller_env()
) {
fields <- form$fields
submit <- submission_find_submit(fields, submit, error_call = error_call)
entry_list <- c(Filter(Negate(is_button), fields), list(submit))
Expand All @@ -171,7 +185,10 @@ submission_build_values <- function(form, submit = NULL, error_call = caller_env
values <- lapply(entry_list, function(x) as.character(x$value))
names <- map_chr(entry_list, "[[", "name")

out <- set_names(unlist(values, use.names = FALSE), rep(names, lengths(values)))
out <- set_names(
unlist(values, use.names = FALSE),
rep(names, lengths(values))
)
as.list(out)
}

Expand Down Expand Up @@ -281,7 +298,7 @@ parse_options <- function(options) {
}

parsed <- lapply(options, parse_option)
value <- map_chr(parsed, "[[", "value")
value <- map_chr(parsed, "[[", "value")
name <- map_chr(parsed, "[[", "name")
selected <- map_lgl(parsed, "[[", "selected")

Expand Down
61 changes: 39 additions & 22 deletions R/live.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@
#' dynamic <- read_html_live("https://www.forbes.com/top-colleges/")
#' # You may need to click the cookie consent banner if it appears
#' dynamic$view()
#'
#'
#' # Now we can find the table
#' dynamic |> html_element("table")
#'
#'
#' # And extract data from it
#' dynamic |>
#' html_element("table") |>
#' dynamic |>
#' html_element("table") |>
#' html_table()
#' }
read_html_live <- function(url) {
Expand Down Expand Up @@ -90,7 +90,9 @@ LiveHTML <- R6::R6Class(
check_installed("chromote")
self$session <- chromote::ChromoteSession$new()

self$session$Network$setUserAgentOverride("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36")
self$session$Network$setUserAgentOverride(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
)

# https://github.com/rstudio/chromote/issues/102
p <- self$session$Page$loadEventFired(wait_ = FALSE)
Expand Down Expand Up @@ -128,7 +130,6 @@ LiveHTML <- R6::R6Class(
xml2::xml_children(xml2::xml_children(xml2::read_html(html)))
},


#' @description Simulate a click on an HTML element.
#' @param css CSS selector.
#' @param n_clicks Number of clicks
Expand Down Expand Up @@ -282,23 +283,28 @@ LiveHTML <- R6::R6Class(

wait_for_selector = function(css, timeout = 5) {
done <- now() + timeout
while(now() < done) {
while (now() < done) {
nodes <- private$find_nodes(css)
if (length(nodes) > 0) {
return(nodes)
}

Sys.sleep(0.1)
}
cli::cli_abort("Failed to find selector {.str {css}} in {timeout} seconds.")
cli::cli_abort(
"Failed to find selector {.str {css}} in {timeout} seconds."
)
},

find_nodes = function(css, xpath) {
check_exclusive(css, xpath)
if (!missing(css)) {
unlist(self$session$DOM$querySelectorAll(private$root_id(), css)$nodeIds)
unlist(
self$session$DOM$querySelectorAll(private$root_id(), css)$nodeIds
)
} else {
search <- glue::glue("
search <- glue::glue(
"
(function() {{
const xpathResult = document.evaluate('{xpath}', document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const nodes = [];
Expand All @@ -307,12 +313,18 @@ LiveHTML <- R6::R6Class(
}}
return(nodes);
}})();
")
"
)

object_id <- self$session$Runtime$evaluate(search)$result$objectId
props <- self$session$Runtime$getProperties(object_id, ownProperties = TRUE)
props <- self$session$Runtime$getProperties(
object_id,
ownProperties = TRUE
)

ids <- map_chr(props$result, function(prop) prop$value$objectId %||% NA_character_)
ids <- map_chr(props$result, function(prop) {
prop$value$objectId %||% NA_character_
})
# Drop non-nodes
ids <- ids[!is.na(ids)]

Expand All @@ -338,14 +350,15 @@ LiveHTML <- R6::R6Class(
now <- function() proc.time()[[3]]

#' @export
html_table.LiveHTML <- function(x,
header = NA,
trim = TRUE,
fill = deprecated(),
dec = ".",
na.strings = "NA",
convert = TRUE) {

html_table.LiveHTML <- function(
x,
header = NA,
trim = TRUE,
fill = deprecated(),
dec = ".",
na.strings = "NA",
convert = TRUE
) {
tables <- html_elements(x, "table")
html_table(
tables,
Expand Down Expand Up @@ -390,7 +403,11 @@ has_chromote <- function() {
}


as_key_desc <- function(key, modifiers = character(), error_call = caller_env()) {
as_key_desc <- function(
key,
modifiers = character(),
error_call = caller_env()
) {
check_string(key, call = error_call)
modifiers <- arg_match(
modifiers,
Expand Down
1 change: 0 additions & 1 deletion R/selectors.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,3 @@ make_selector <- function(css, xpath, error_call = caller_env()) {
xpath
}
}

34 changes: 19 additions & 15 deletions R/session.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@
session <- function(url, ...) {
check_string(url)

session <- structure(
session <- structure(
list(
handle = httr::handle(url),
config = c(..., httr::config(autoreferer = 1L)),
handle = httr::handle(url),
config = c(..., httr::config(autoreferer = 1L)),
response = NULL,
url = NULL,
back = character(),
forward = character(),
cache = new_environment()
url = NULL,
back = character(),
forward = character(),
cache = new_environment()
),
class = "rvest_session"
)
Expand Down Expand Up @@ -217,7 +217,9 @@ read_html.rvest_session <- function(x, ...) {

is_html <- function(x) {
type <- httr::headers(x)$`Content-Type`
if (is.null(type)) return(FALSE)
if (is.null(type)) {
return(FALSE)
}

parsed <- httr::parse_media(type)
parsed$complete %in% c("text/html", "application/xhtml+xml")
Expand All @@ -231,13 +233,15 @@ html_form.rvest_session <- function(x, base_url = NULL) {
}

#' @export
html_table.rvest_session <- function(x,
header = NA,
trim = TRUE,
fill = deprecated(),
dec = ".",
na.strings = "NA",
convert = TRUE) {
html_table.rvest_session <- function(
x,
header = NA,
trim = TRUE,
fill = deprecated(),
dec = ".",
na.strings = "NA",
convert = TRUE
) {
html_table(
read_html(x),
header = header,
Expand Down
Loading
Loading