Hans Rosling’s 200 Countries, 200 Years, 4 Minutes - The Joy of Stats - BBC Four
Why do you, or have you, in the past used data graphics?
Source: https://mtstateintrostats.github.io/IntroStatTextbook/index.html
R Shiny provides a way to create interactive visualizations and web applets.
There are two key components of an R Shiny Script:
# Define server logic required to draw a histogram server <- function(input, output) { output$distPlot <- renderPlot({ # generate bins based on input$bins from ui.R x <- faithful[, 2] bins <- seq(min(x), max(x), length.out = input$bins + 1) # draw the histogram with the specified number of bins hist(x, breaks = bins, col = 'darkgray', border = 'white') }) }
input.bins <- 30 # number of bins x <- faithful[, 2] bins <- seq(min(x), max(x), length.out = input.bins + 1) # draw the histogram with the specified number of bins hist(x, breaks = bins, col = 'darkgray', border = 'white')
# Define UI for application that draws a histogram ui <- fluidPage( # Application title titlePanel("Old Faithful Geyser Data"), # Sidebar with a slider input for number of bins sidebarLayout( sidebarPanel( sliderInput("bins", "Number of bins:", min = 1, max = 50, value = 30) ), # Show a plot of the generated distribution mainPanel( plotOutput("distPlot") ) ) )
# Define server logic required to draw a histogram server <- function(input, output) { output$distPlot <- renderPlot({ # generate bins based on input$bins from ui.R x <- faithful[, 2] bins <- seq(min(x), max(x), length.out = input$bins + 1) # draw the histogram with the specified number of bins hist(x, breaks = bins, col = 'darkgray', border = 'white') }) } # Run the application shinyApp(ui = ui, server = server)
Open the default Shiny Web App in R that contains the interactive histogram of Old faithful eruptions. Change the color of the histogram and reload the app.
Open the default Shiny document R Markdown file and change the system file to use a different embedded Shiny application example than “06_tabsets”.
The babynames
library contains a (giant) dataset called babynames
, which contains the full baby name data provided by the Social Security Administration (SSA). Type the following to explore the data set and its variables:
library(babynames) # You will most likely have to install this data(babynames) # Load data into environment ?babynames head(babynames)
Using the babynames
data set, implement your own version of this app from the SSA.
bike.data <- read_csv('http://www.math.montana.edu/ahoegh/teaching/stat408/datasets/Bike.csv')
bike.data <- bike.data %>% mutate(year = as.factor(year(datetime)), month = as.factor(month(datetime))) monthly.counts <- bike.data %>% group_by(month) %>% summarize(num_bikes = sum(count), .groups = 'drop') %>% arrange(month) monthly.counts
## # A tibble: 12 × 2 ## month num_bikes ## <fct> <dbl> ## 1 1 79884 ## 2 2 99113 ## 3 3 133501 ## 4 4 167402 ## 5 5 200147 ## 6 6 220733 ## 7 7 214617 ## 8 8 213516 ## 9 9 212529 ## 10 10 207434 ## 11 11 176440 ## 12 12 160160
monthly.counts %>% ggplot(aes(y = num_bikes, x = month)) + geom_bar(stat = 'identity') + xlab('Month') + ylab('Bike Rentals') + labs(title = 'Bike Rentals per Month in 2011-2012 \n Capital Bikeshare in Washington, DC', caption = 'Source: www.capitalbikeshare.com')
bike.counts <- aggregate(cbind(bike.data$casual,bike.data$registered), by=list(bike.data$month), sum) barplot(t(as.matrix(bike.counts[,-1])), names.arg =collect(select(monthly.counts, month))[[1]], xlab='Month', sub ='Source: www.capitalbikeshare.com', ylab='Bike Rentals', main='Bike Rentals per Month in 2011 - 2012 \n Capital Bikeshare in Washington, DC', col=c("darkblue","red"),legend.text = c("Casual", "Registered"), args.legend = list(x = "topleft"))
plot(rowSums(bike.counts[,-1])~bike.counts[,1],xlab='Month', sub ='Source: www.capitalbikeshare.com', ylab='Bike Rentals', main='Bike Rentals per Month \n Capital Bikeshare in Washington, DC', col=c("darkblue"),pch=16,axes=F, ylim=c(0,max(rowSums(bike.counts[,-1])))) axis(2) axis(1,at=1:12) box()
mean_temp <- bike.data %>% group_by(month) %>% summarize(mean_temp = mean(temp),.groups = 'drop') %>% mutate(month = as.numeric(month)) ggplot(aes(y=temp, x= month), data = bike.data) + geom_jitter(alpha = .1) + geom_line(inherit.aes = F, aes(y = mean_temp, x = month), data = mean_temp, color = 'red', lwd = 2) + ylab('Average Temp (C)') + xlab('Month') + labs(title = 'Average Temperature in Washington, DC', caption = 'Source: www.capitalbikeshare.com')
bike.data$tempF <- bike.data$temp * 1.8 + 32 plot(bike.data$count~bike.data$tempF,pch=16, col=rgb(100,0,0,10,max=255),ylab='Hourly Bike Rentals', xlab='Temp (F)',sub ='Source: www.capitalbikeshare.com', main='Hourly Bike Rentals by Temperature') bike.fit <- loess(count~tempF,bike.data) temp.seq <- seq(min(bike.data$tempF),max(bike.data$tempF)) lines(predict(bike.fit,temp.seq)~temp.seq,lwd=2)
par(mfcol=c(2,2),oma = c(1,0,0,0)) bike.data$tempF <- bike.data$temp * 1.8 + 32 plot(bike.data$count~bike.data$tempF,pch=16,col=rgb(100,0,0,10,max=255), ylab='Hourly Bike Rentals',xlab='Temp (F)', main='Hourly Bike Rentals by Temperature') bike.fit <- loess(count~tempF,bike.data) temp.seq <- seq(min(bike.data$tempF),max(bike.data$tempF)) lines(predict(bike.fit,temp.seq)~temp.seq,lwd=2) plot(bike.data$count~bike.data$humidity,pch=16, col=rgb(100,0,100,10,max=255), ylab='Hourly Bike Rentals',xlab='Humidity (%)', main='Hourly Bike Rentals by Humidity') bike.fit <- loess(count~humidity,bike.data) humidity.seq <- seq(min(bike.data$humidity),max(bike.data$humidity)) lines(predict(bike.fit,humidity.seq)~humidity.seq,lwd=2) plot(bike.data$count~bike.data$windspeed,pch=16,col=rgb(0,0,100,10,max=255), ylab='Hourly Bike Rentals',xlab='Windspeed (MPH)',main='Hourly Bike Rentals by Windspeed') bike.fit <- loess(count~windspeed,bike.data) windspeed.seq <- seq(min(bike.data$windspeed),max(bike.data$windspeed)) lines(predict(bike.fit,windspeed.seq)~windspeed.seq,lwd=2) plot(bike.data$count~as.factor(bike.data$weather),col=rgb(0,100,0,255,max=255), ylab='Hourly Bike Rentals',xlab='Weather Conditions',main='Hourly Bike Rentals by Weather') mtext('Source: www.capitalbikeshare.com', outer = TRUE, cex = .9, side=1) par(mfcol=c(1,1),oma = c(0,0,0,0))
hist(bike.data$tempF,prob=T, main='Temperature (F)',col='red',xlab='')
par(mfrow=c(2,1)) bike.data$reltempF <- bike.data$atemp * 1.8 + 32 hist(bike.data$tempF,prob=T,breaks='FD', main='Temperature (F)',col='red',xlab='', xlim=c(0,max(c(bike.data$reltempF,bike.data$tempF)))) hist(bike.data$reltempF,prob=T,breaks='FD', main='Relative Temperature (F)',col='orange',xlab='', xlim=c(0,max(c(bike.data$reltempF,bike.data$tempF))))