Created
September 20, 2024 15:59
-
-
Save spoonerf/5a76e743d603c6e8e15f003b14165226 to your computer and use it in GitHub Desktop.
Download mpox data for Africa from WHO Shiny App
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(httr) | |
library(rvest) | |
library(base64enc) | |
library(readr) | |
get_shiny_data <- function() { | |
url <- "https://worldhealthorg.shinyapps.io/mpx_global/#26_Case_definitions" # URL of the webpage | |
response <- GET(url) # Fetch the page content | |
if (status_code(response) == 200) { # Check if the request was successful | |
page <- content(response, "text") # Parse the HTML content with rvest | |
soup <- read_html(page) | |
button <- html_node(soup, xpath = "//button[contains(@class, 'btn-primary')]") # Find the button tag with the specific class might break if WHO changes arrangement of their page | |
if (!is.null(button)) { # Extract the value of the 'onclick' attribute | |
onclick_value <- html_attr(button, "onclick") | |
if (grepl("data:text/csv;base64,", onclick_value)) { # If the attribute contains 'data:text/csv;base64,', extract and decode it | |
base64_data <- strsplit(onclick_value, "data:text/csv;base64,")[[1]][2] | |
base64_data <- gsub("[')]", "", base64_data) | |
decoded_csv <- rawToChar(base64decode(base64_data)) # Decode base64 and clean it | |
cleaned_csv <- iconv(decoded_csv, "latin1", "ASCII", sub = "") # Clean any problematic characters, such as null or non-printable characters | |
csv_data <- read_csv(I(cleaned_csv), skip_empty_rows = TRUE) # Read cleaned CSV into a dataframe | |
return(csv_data) | |
} | |
} | |
} else { | |
print(paste("Failed to retrieve the webpage. Status code:", status_code(response))) | |
} | |
return(NULL) | |
} | |
df <- get_shiny_data() # Run the function to get the data | |
print(df) | |
df %>% | |
filter(week_end_date > as.Date('2023-12-31')) %>% | |
ggplot(., aes(x = week_end_date, y = total_suspected_cases, group = country))+ | |
geom_point()+ | |
facet_wrap(~country, scales = 'free') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment