error messages for missing arguments for ggplot2

48 Views Asked by At
Warning messages:
1: In min(x) : no non-missing arguments to min; returning Inf
2: In max(x) : no non-missing arguments to max; returning -Inf
3: In min(diff(sort(x))) : no non-missing arguments to min; returning Inf
4: Removed 7687 rows containing non-finite values (`stat_count()`). 

I am trying to create a stacked bar chart of influenza serotypes by month and I cannot get past this error message because I am unsure as to what I am missing.

library(tidyverse)
library(dplyr)
library(lubridate)

library(ggplot2)
library(ggthemes)

Sys.setenv(TZ="America/New_York")
today <- Sys.Date()

flu2018_file <- "2018_flu.txt"
df_flu2018 <- as.data.frame(read.table(flu2018_file, sep="\t", header=TRUE, check.names=FALSE, fill=TRUE))

df_flu2018$month <- floor_date(as_date(mdy(df_flu2018$date)), unit="month")

plot_1 <- ggplot(df_flu2018) + 
     geom_histogram(stat="count", aes(x = month, fill = serotype)) 

plot_1

below is the output for dput(head(df_flu2018))

df_flu2018 <- structure(list(accession = c("MH083301", "MH083303", "MH083309", 
"MH083311", "MH081888", "MH081892"), length = c(1752L, 1433L, 
1752L, 1433L, 1441L, 1737L), host = c("Human", "Human", "Human", 
"Human", "Human", "Human"), segment = c("4 (HA)", "6 (NA)", "4 (HA)", 
"6 (NA)", "6 (NA)", "4 (HA)"), serotype = c("H1N1", "H1N1", "H1N1", 
"H1N1", "H3N2", "H3N2"), country = c("USA", "USA", "USA", "USA", 
"USA", "USA"), region = c("N", "N", "N", "N", "N", "N"), date = c("2018/01/02", 
"2018/01/02", "2018/01/03", "2018/01/03", "2018/01/03", "2018/01/03"
), name = c("Influenza A virus (A/Alabama/01/2018(H1N1))", "Influenza A virus (A/Alabama/01/2018(H1N1))", 
"Influenza A virus (A/Alabama/02/2018(H1N1))", "Influenza A virus (A/Alabama/02/2018(H1N1))", 
"Influenza A virus (A/Alabama/03/2018(H3N2))", "Influenza A virus (A/Alabama/03/2018(H3N2))"
), mutations = c("", "", "", "", "", ""), age = c(NA, NA, NA, 
NA, NA, NA), gender = c(NA, NA, NA, NA, NA, NA), lineage = c(NA, 
NA, NA, NA, NA, NA), vac_strain = c(NA, NA, NA, NA, NA, NA), 
    fulllength_plus = c("c", "c", "c", "c", "c", "c"), month = structure(c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date")), row.names = c(NA, 
6L), class = "data.frame")
1

There are 1 best solutions below

0
stefan On

The issue is that when creating your dates you use mdy() which requires dates in the format "Month-Day-Year" whereas the data column in your dataset is in the format YYYY-DD-MM or YYYY-MM-DD, i.e. the year comes first. As a result you end up with a month vector of NAs or missings which results in the error no non-missing arguments:

df_flu2018$month <- floor_date(
  mdy(df_flu2018$date),
  unit = "month"
)
#> Warning: All formats failed to parse. No formats found.

df_flu2018$month
#> [1] NA NA NA NA NA NA

To fix that use the right conversion function, i.e. ymd() or ydm(), e.g. assuming that your dates are in the format YYYY-MM-DD:

library(ggplot2)
library(lubridate)

df_flu2018$month <- floor_date(
  ymd(df_flu2018$date),
  unit = "month"
)

ggplot(df_flu2018) +
  geom_bar(aes(x = month, fill = serotype))