library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(manipulate)
library(mosaic)
## Loading required package: lattice
## Loading required package: ggformula
##
## New to ggformula? Try the tutorials:
## learnr::run_tutorial("introduction", package = "ggformula")
## learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Loading required package: Matrix
##
## The 'mosaic' package masks several functions from core packages in order to add
## additional features. The original behavior of these functions should not be affected by this.
##
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
##
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
##
## mean
## The following objects are masked from 'package:dplyr':
##
## count, do, tally
## The following objects are masked from 'package:stats':
##
## binom.test, cor, cor.test, cov, fivenum, IQR, median,
## prop.test, quantile, sd, t.test, var
## The following objects are masked from 'package:base':
##
## max, mean, min, prod, range, sample, sum
library(class)
library(rpart)
library(dplyr)
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(stringr)
library(proxy)
## Warning: package 'proxy' was built under R version 3.4.2
##
## Attaching package: 'proxy'
## The following object is masked from 'package:Matrix':
##
## as.matrix
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
library(ggmap)
library(maps)
library(lubridate)
library(hexbin)
library(datadr)
library(spatialEco)
## spatialEco 0.1-7
## Type se.news() to see new features/changes/bug fixes.
library(rgeos)
## Warning: package 'rgeos' was built under R version 3.4.2
## rgeos version: 0.3-26, (SVN revision 560)
## GEOS runtime version: 3.6.1-CAPI-1.10.1 r0
## Linking to sp version: 1.2-5
## Polygon checking: TRUE
library(sp)
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.4.3
## rgdal: version: 1.2-16, (SVN revision 701)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: FALSE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/proj
## Linking to sp version: 1.2-5
##
## Attaching package: 'rgdal'
## The following object is masked from 'package:mosaic':
##
## project
library(prevR)
## Loading required package: directlabels
##
##
## Welcome to 'prevR': estimate regional trends of a prevalence.
## - type help('prevR') for details
## - type demo(prevR) for a demonstration
## - type citation('prevR') to cite prevR in a publication.
##
##
library(directlabels)
library(RColorBrewer)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:prevR':
##
## export
## The following object is masked from 'package:ggmap':
##
## wind
## The following object is masked from 'package:mosaic':
##
## do
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
theme_black = function(base_size = 12, base_family = "") {
theme_grey(base_size = base_size, base_family = base_family) %+replace%
theme(
# Specify axis options
axis.line = element_blank(),
axis.text.x = element_text(size = base_size*0.8, color = "white", lineheight = 0.9),
axis.text.y = element_text(size = base_size*0.8, color = "white", lineheight = 0.9),
axis.ticks = element_line(color = "white", size = 0.2),
axis.title.x = element_text(size = base_size, color = "white", margin = margin(0, 10, 0, 0)),
axis.title.y = element_text(size = base_size, color = "white", angle = 90, margin = margin(0, 10, 0, 0)),
axis.ticks.length = unit(0.3, "lines"),
# Specify legend options
legend.background = element_rect(color = NA, fill = "black"),
legend.key = element_rect(color = "white", fill = "black"),
legend.key.size = unit(1.2, "lines"),
legend.key.height = NULL,
legend.key.width = NULL,
legend.text = element_text(size = base_size*0.8, color = "white"),
legend.title = element_text(size = base_size*0.8, face = "bold", hjust = 0, color = "white"),
legend.position = "right",
legend.text.align = NULL,
legend.title.align = NULL,
legend.direction = "vertical",
legend.box = NULL,
# Specify panel options
panel.background = element_rect(fill = "black", color = NA),
panel.border = element_rect(fill = NA, color = "white"),
panel.grid.major = element_line(color = "grey35"),
panel.grid.minor = element_line(color = "grey20"),
panel.margin = unit(0.5, "lines"),
# Specify facetting options
strip.background = element_rect(fill = "grey30", color = "grey10"),
strip.text.x = element_text(size = base_size*0.8, color = "white"),
strip.text.y = element_text(size = base_size*0.8, color = "white",angle = -90),
# Specify plot options
plot.background = element_rect(color = "black", fill = "black"),
plot.title = element_text(size = base_size*1.2, color = "white"),
plot.margin = unit(rep(1, 4), "lines")
)
}
taxi_master<- read.csv("/Users/Zack/Desktop/Master_taxi_data2.csv",stringsAsFactors = FALSE)
taxi_master$pickup_datetime <- as_datetime(taxi_master$pickup_datetime )
taxi_master$dropoff_datetime <- as_datetime(taxi_master$dropoff_datetime )
taxi_master_trim <- taxi_master[-c(2, 11)]
taxi_master_trim <- taxi_master_trim %>%
filter(total_amount != 0)
taxi_master_trim <- taxi_master_trim %>%
mutate(tip_perc = tip_amount/total_amount)
taxi_master_trim <- taxi_master_trim %>%
mutate(trip_time_in_mins = difftime(dropoff_datetime,pickup_datetime,units="mins"))
taxi_master_trim <- taxi_master_trim %>%
mutate(trip_time_in_seconds = difftime(dropoff_datetime,pickup_datetime,units="secs"))
taxi_master_trim <- taxi_master_trim %>%
mutate(MPH = trip_distance/(as.numeric(trip_time_in_mins)/60))
taxi_master_trim <- taxi_master_trim %>%
mutate(MPM = trip_distance/(as.numeric(trip_time_in_mins)))
taxi_master_trim <- taxi_master_trim %>%
filter(as.numeric(trip_time_in_seconds) > 15 ) %>%
filter(passenger_count > 0)%>%
filter(MPH < 60)%>%
filter(MPH > 0)
summary(taxi_master_trim)
## X.1 vendor_id pickup_datetime
## Min. : 1 Length:1639322 Min. :2014-01-01 05:01:00
## 1st Qu.: 421909 Class :character 1st Qu.:2014-03-30 00:33:53
## Median : 1469412 Mode :character Median :2014-06-24 18:12:00
## Mean : 2277234 Mean :2014-06-29 03:38:41
## 3rd Qu.: 4078532 3rd Qu.:2014-09-29 04:15:45
## Max. :45448110 Max. :2015-01-01 04:59:00
## dropoff_datetime passenger_count trip_distance
## Min. :2014-01-01 05:04:00 Min. :1.000 Min. : 0.010
## 1st Qu.:2014-03-30 00:46:29 1st Qu.:1.000 1st Qu.: 1.060
## Median :2014-06-24 18:30:03 Median :1.000 Median : 1.800
## Mean :2014-06-29 03:52:05 Mean :1.698 Mean : 2.953
## 3rd Qu.:2014-09-29 04:27:54 3rd Qu.:2.000 3rd Qu.: 3.220
## Max. :2015-01-01 05:21:49 Max. :9.000 Max. :78.540
## pickup_longitude pickup_latitude rate_code dropoff_longitude
## Min. :-94.38 Min. : 0.00 Min. : 0.000 Min. :-111.52
## 1st Qu.:-73.99 1st Qu.:40.73 1st Qu.: 1.000 1st Qu.: -73.99
## Median :-73.98 Median :40.75 Median : 1.000 Median : -73.98
## Mean :-72.59 Mean :39.99 Mean : 1.028 Mean : -72.65
## 3rd Qu.:-73.97 3rd Qu.:40.77 3rd Qu.: 1.000 3rd Qu.: -73.96
## Max. : 0.00 Max. :89.73 Max. :210.000 Max. : 0.00
## dropoff_latitude payment_type fare_amount surcharge
## Min. : 0.00 Length:1639322 Min. : 0.00 Min. : 0.0000
## 1st Qu.: 40.73 Class :character 1st Qu.: 6.50 1st Qu.: 0.0000
## Median : 40.75 Mode :character Median : 9.50 Median : 0.0000
## Mean : 40.02 Mean : 12.58 Mean : 0.3179
## 3rd Qu.: 40.77 3rd Qu.: 14.50 3rd Qu.: 0.5000
## Max. :458.65 Max. :499.99 Max. :10.0000
## mta_tax tip_amount tolls_amount total_amount
## Min. :0.0000 Min. : 0.000 Min. : 0.000 Min. : 1.00
## 1st Qu.:0.5000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 8.00
## Median :0.5000 Median : 1.000 Median : 0.000 Median : 11.50
## Mean :0.4987 Mean : 1.502 Mean : 0.275 Mean : 15.18
## 3rd Qu.:0.5000 3rd Qu.: 2.000 3rd Qu.: 0.000 3rd Qu.: 17.00
## Max. :0.5000 Max. :200.000 Max. :43.500 Max. :499.99
## tip_perc trip_time_in_mins trip_time_in_seconds
## Min. :0.00000 Length:1639322 Length:1639322
## 1st Qu.:0.00000 Class :difftime Class :difftime
## Median :0.10000 Mode :numeric Mode :numeric
## Mean :0.08821
## 3rd Qu.:0.16551
## Max. :1.00000
## MPH MPM
## Min. : 0.00527 Min. :0.0000878
## 1st Qu.: 8.14286 1st Qu.:0.1357143
## Median :11.11111 Median :0.1851852
## Mean :12.48808 Mean :0.2081346
## 3rd Qu.:15.10909 3rd Qu.:0.2518182
## Max. :59.40000 Max. :0.9900000
mean(taxi_master_trim$tip_amount)
## [1] 1.501708
mean(taxi_master_trim$tip_perc)
## [1] 0.08821102
ggplot(data = taxi_master_trim) + geom_point(aes(x = MPH , y =tip_perc), alpha = .01) + labs(x = "Miles per Hour" ,y = "Tip Percentage" , title = "Tip Percentage by MPH" )
tip_regression <- lm(data = taxi_master_trim, tip_perc ~ MPH)
summary(tip_regression)
##
## Call:
## lm(formula = tip_perc ~ MPH, data = taxi_master_trim)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.11016 -0.08702 0.01169 0.07549 0.91478
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.237e-02 1.423e-04 578.80 <2e-16 ***
## MPH 4.678e-04 1.011e-05 46.25 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08393 on 1639320 degrees of freedom
## Multiple R-squared: 0.001303, Adjusted R-squared: 0.001302
## F-statistic: 2139 on 1 and 1639320 DF, p-value: < 2.2e-16
plot(resid(tip_regression))
ggplot(data = taxi_master_trim,aes(x = MPH , y = tip_perc)) + geom_point(aes(x = MPH , y = tip_perc), alpha = .01) + labs(x = "Miles per Hour" ,y = "Tip Percentage" , title = "Tip Percentage by MPH" ) + geom_smooth(method = "lm", se = FALSE)
ggplot(taxi_master_trim, aes(x=pickup_longitude, y=pickup_latitude)) +
geom_point(size=0.03)
min_lat <- 40.5774
max_lat <- 40.9176
min_long <- -74.15
max_long <- -73.7004
plot <- ggplot(taxi_master_trim, aes(x=pickup_longitude, y=pickup_latitude)) +
geom_point(size=0.03) +
scale_x_continuous(limits=c(min_long, max_long)) +
scale_y_continuous(limits=c(min_lat, max_lat))
plot
## Warning: Removed 30945 rows containing missing values (geom_point).
min_lat <- 40.6
max_lat <- 40.9
min_long <- -74.1
max_long <- -73.75
ggplot(taxi_master_trim, aes(x=pickup_longitude, y=pickup_latitude)) +
geom_point(size=0.03,alpha=.008) +
scale_x_continuous(limits=c(min_long, max_long)) +
scale_y_continuous(limits=c(min_lat, max_lat))
## Warning: Removed 31102 rows containing missing values (geom_point).
ggplot(taxi_master_trim, aes(x=pickup_longitude, y=pickup_latitude)) +
scale_x_continuous(limits=c(min_long, max_long)) +
scale_y_continuous(limits=c(min_lat, max_lat)) + geom_hex(aes(fill="#000000"),fill="Forest Green", bins = 300)
## Warning: Removed 31102 rows containing non-finite values (stat_binhex).
## Warning: Removed 2 rows containing missing values (geom_hex).
ggplot(taxi_master_trim, aes(x=pickup_longitude, y=pickup_latitude)) +
scale_x_continuous(limits=c(min_long, max_long)) +
scale_y_continuous(limits=c(min_lat, max_lat)) + geom_hex(aes(fill="#000000",alpha=log(..count..)),fill="Forest Green", bins = 300)
## Warning: Removed 31102 rows containing non-finite values (stat_binhex).
## Warning: Removed 2 rows containing missing values (geom_hex).
min_lat <- 40.6
max_lat <- 40.9
min_long <- -74.1
max_long <- -73.75
hbin <- hexbin(taxi_master_trim$pickup_longitude,taxi_master_trim$pickup_latitude, xbins = 40)
ggplot(taxi_master_trim, aes(x=pickup_longitude, y=pickup_latitude)) +
scale_x_continuous(limits=c(min_long, max_long)) +
scale_y_continuous(limits=c(min_lat, max_lat)) + geom_hex(aes(fill="#000000",alpha=log(..count..)),fill="Forest Green", bins = 300) + theme_black() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "white"))
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
## Warning: Removed 31102 rows containing non-finite values (stat_binhex).
## Warning: Removed 2 rows containing missing values (geom_hex).
taxi_hex <- taxi_master_trim %>%
filter(pickup_longitude > -74.1) %>%
filter(pickup_longitude < -73.75) %>%
filter(pickup_latitude > 40.6) %>%
filter(pickup_latitude < 40.9)
hbin <- hexbin(taxi_hex$pickup_longitude,taxi_hex$pickup_latitude, xbins = 400)
plot2 <- plot(hbin, trans = log, inv = exp, xlab = "longitude", ylab = "latitude", colramp = heat.ob)
plot2
## $plot.vp
## An object of class "hexVP"
## Slot "hexVp.on":
## viewport[zkeqx.on]
##
## Slot "hexVp.off":
## viewport[zkeqx.off]
##
## Slot "mar":
## [1] 5.1lines 7.6lines 4.1lines 5.6lines
##
## Slot "fig":
## [1] 0.828571428571429npc 0.714285714285714npc
##
## Slot "plt":
## [1] 0.451428571428571npc 0.451428571428571npc
##
## Slot "xscale":
## [1] -74.11639 -73.73256
##
## Slot "yscale":
## [1] 40.58499 40.91420
##
## Slot "shape":
## [1] 1
##
## Slot "hp.name":
## character(0)
##
##
## $legend.vp
## viewport[GRID.VP.89]
plot <- ggplot(taxi_master_trim, aes(x=pickup_longitude, y=pickup_latitude, z=total_amount)) +
stat_summary_hex(fun = mean, bins=350, alpha=1) +
scale_x_continuous(limits=c(min_long, max_long)) +
scale_y_continuous(limits=c(min_lat, max_lat)) +
scale_fill_gradient(low="#CCCCCC", high="#27AE60") +
coord_equal()
plot
## Warning: Removed 31102 rows containing non-finite values
## (stat_summary_hex).
taxi_master_trim <- taxi_master_trim %>%
mutate(Dollars_per_min = total_amount/as.numeric(trip_time_in_mins))
plot <- ggplot(taxi_master_trim, aes(x=pickup_longitude, y=pickup_latitude, z=Dollars_per_min)) +
stat_summary_hex(fun = mean, bins=200, alpha=1) +
scale_x_continuous(limits=c(min_long, max_long)) +
scale_y_continuous(limits=c(min_lat, max_lat)) +
scale_fill_gradient(low="#FFFD86", high="#000000") +
coord_equal()
plot
## Warning: Removed 31102 rows containing non-finite values
## (stat_summary_hex).
min_lat <- 40.6
max_lat <- 40.9
min_long <- -74.1
max_long <- -73.75
ny.map <- readOGR("/Users/Zack/Desktop/ZillowNeighborhoods-NY", layer="ZillowNeighborhoods-NY")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/Zack/Desktop/ZillowNeighborhoods-NY", layer: "ZillowNeighborhoods-NY"
## with 579 features
## It has 5 fields
ny <- ny.map[ny.map$CITY == "New York"]
plot(ny, xlim=c(min_long,max_long), ylim =c(min_lat,max_lat))
points(taxi_master_trim$pickup_longitude,taxi_master_trim$pickup_latitude ,pch = 20,cex = .1,col = alpha("black", 1))
dat <- data.frame(Longitude = taxi_master_trim$pickup_longitude, Latitude = taxi_master_trim$pickup_latitude ,names = as.character(taxi_master_trim$X))
coordinates(dat) <- ~ Longitude + Latitude
proj4string(dat)
## [1] NA
proj4string(ny)
## [1] "+proj=longlat +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0"
#<- CRS(as.character(NA))
#tf_list <- point.in.SpatialPolygons(taxi_master_trim$pickup_longitude, taxi_master_trim$pickup_latitude, ny.map)
#taxi_master_trim <- taxi_master_trim %>%
# mutate(tf = tf_list)
#taxi_master_trim <- taxi_master_trim %>%
# filter(tf == TRUE)
taxi_master_trim<- read.csv("/Users/Zack/Desktop/Master_taxi_data_trim_tf.csv",stringsAsFactors = FALSE)
plot(ny.map, xlim=c(min_long,max_long), ylim =c(min_lat,max_lat))
points(taxi_master_trim$pickup_longitude,taxi_master_trim$pickup_latitude ,pch = 20,cex = .1,col = alpha("black", 1))
plot <- ggplot(taxi_master_trim, aes(x=pickup_longitude, y=pickup_latitude, z=Dollars_per_min)) +
stat_summary_hex(fun = mean, bins=150, alpha=1) +
scale_x_continuous(limits=c(min_long, max_long)) +
scale_y_continuous(limits=c(min_lat, max_lat)) +
scale_fill_gradient(low="#FFFD86", high="#000000") + coord_equal()
plot
## Warning: Removed 141 rows containing non-finite values (stat_summary_hex).
## Warning: Removed 3 rows containing missing values (geom_hex).
head(filter(taxi_master_trim, Dollars_per_min > 100))
## X X.1 vendor_id pickup_datetime dropoff_datetime
## 1 39714 396296 CMT 2014-01-22 21:56:49 2014-01-22 21:59:05
## 2 86387 561336 CMT 2014-01-02 14:18:41 2014-01-02 14:20:13
## 3 98243 464200 CMT 2014-01-04 11:09:49 2014-01-04 11:10:17
## 4 113252 246523 CMT 2014-01-27 11:42:04 2014-01-27 11:42:39
## 5 148975 171090 CMT 2014-02-19 20:27:20 2014-02-19 20:27:50
## 6 181375 584286 CMT 2014-02-22 00:09:08 2014-02-22 00:12:04
## passenger_count trip_distance pickup_longitude pickup_latitude rate_code
## 1 1 0.4 -73.80183 40.66007 5
## 2 1 0.8 -73.99975 40.73270 5
## 3 1 0.1 -73.97546 40.74882 2
## 4 2 0.3 -73.95491 40.71698 5
## 5 1 0.1 -73.96996 40.79730 2
## 6 1 1.2 -73.97436 40.64485 5
## dropoff_longitude dropoff_latitude payment_type fare_amount surcharge
## 1 -73.80562 40.66058 NOC 250.0 0
## 2 -74.00116 40.73331 CRD 175.0 0
## 3 -73.97632 40.74938 CRD 52.0 0
## 4 -73.95489 40.71695 CRD 45.5 0
## 5 -73.97120 40.79536 NOC 52.0 0
## 6 -73.98529 40.65921 CRD 299.0 0
## mta_tax tip_amount tolls_amount total_amount tip_perc trip_time_in_mins
## 1 0.0 0.00 0.00 250.00 0.0000000 2.2666667
## 2 0.0 0.00 0.00 175.00 0.0000000 1.5333333
## 3 0.5 10.00 5.33 67.83 0.1474274 0.4666667
## 4 0.0 13.65 0.00 59.15 0.2307692 0.5833333
## 5 0.5 0.00 0.00 52.50 0.0000000 0.5000000
## 6 0.0 0.00 0.00 299.00 0.0000000 2.9333333
## trip_time_in_seconds MPH MPM Dollars_per_min tf ytd
## 1 136 10.58824 0.1764706 110.2941 TRUE 22
## 2 92 31.30435 0.5217391 114.1304 TRUE 2
## 3 28 12.85714 0.2142857 145.3500 TRUE 4
## 4 35 30.85714 0.5142857 101.4000 TRUE 27
## 5 30 12.00000 0.2000000 105.0000 TRUE 50
## 6 176 24.54545 0.4090909 101.9318 TRUE 53
taxi_master_trim <- taxi_master_trim %>%
filter(trip_time_in_mins > 3)
taxi_master_trim <- taxi_master_trim %>%
filter(Dollars_per_min < 30)
plot <- ggplot(taxi_master_trim, aes(x=pickup_longitude, y=pickup_latitude, z=Dollars_per_min)) +
stat_summary_hex(fun = mean, bins=120, alpha=1) +
scale_x_continuous(limits=c(min_long, max_long)) +
scale_y_continuous(limits=c(min_lat, max_lat)) +
scale_fill_gradient(low="#FFFD86", high="#000000") + coord_equal() + coord_equal() + labs(x = NULL ,y = NULL , title = "Taxi Dollars Per Minute" ) + theme(axis.ticks = element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black")) + labs(fill='$/min')
plot
## Warning: Removed 120 rows containing non-finite values (stat_summary_hex).
## Warning: Removed 3 rows containing missing values (geom_hex).
mean(taxi_master_trim$Dollars_per_min)
## [1] 1.184451
plot <- ggplot(taxi_master_trim, aes(x=pickup_longitude, y=pickup_latitude, z=total_amount)) +
stat_summary_hex(fun = sum, bins=83, alpha=.9) +
scale_x_continuous(limits=c(-74.025, -73.9)) +
scale_y_continuous(limits=c(40.69, 40.83)) +
scale_fill_gradient(low="#FFFD86", high="#000000",trans = "sqrt", breaks=c(1000,40000,160000),labels=c("Low","Mid","High")) + coord_equal() + labs(x = NULL ,y = NULL , title = "Taxi Revenue Across New York") + theme(axis.ticks = element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black")) + labs(fill='Revenue')
plot
## Warning: Removed 83832 rows containing non-finite values
## (stat_summary_hex).
## Warning: Removed 30 rows containing missing values (geom_hex).
taxi_master_trim_tipa <- filter(taxi_master_trim,tip_perc < .5)
mean(taxi_master_trim_tipa$tip_perc)
## [1] 0.0889925
ggplot(data = taxi_master_trim_tipa) + geom_histogram(aes(x = tip_perc),binwidth = .05, fill = "yellow")+ scale_x_continuous(limits = c(-.05, .5)) + geom_vline(aes(xintercept=mean(taxi_master_trim_tipa$tip_perc)),
linetype="dashed", color = "black")+
ggtitle("Tip Percentages in NYC Taxis") +
labs(y="Count", x = "Tip Percent")
## Warning: Removed 2 rows containing missing values (geom_bar).
ggplot(taxi_master_trim, aes(x=pickup_longitude, y=pickup_latitude)) +
scale_x_continuous(limits=c(-74.025, -73.9)) +
scale_y_continuous(limits=c(40.69, 40.83)) + geom_hex(aes(fill="#000000",alpha=log(..count..,20)),fill="Forest Green", bins = 500) + theme_black() + labs(x = NULL ,y = NULL , title = "Concentration of NYC Taxi Trips") + coord_equal() + labs(alpha = "Log of Concentration")
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
## Warning: Removed 83832 rows containing non-finite values (stat_binhex).
## Warning: Removed 18 rows containing missing values (geom_hex).
weather <- read.csv("/Users/Zack/Desktop/csweather.csv")
taxi_master_trim <- taxi_master_trim %>%
mutate(ytd = yday(taxi_master_trim$pickup_datetime))
head(weather)
## Day Temp.avg..F. Wind.avg..mph. Percip.sum..in. Rain Snow Fog X X.1
## 1 1/1/14 29 14 0 NA NA
## 2 1/2/14 26 21 0.33 yes yes NA NA
## 3 1/3/14 14 21 0.29 NA NA
## 4 1/4/14 19 9 0 NA NA
## 5 1/5/14 34 7 0.14 yes yes NA NA
## 6 1/6/14 37 18 0.36 yes yes NA NA
## X.2 X.3 X.4
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
weather$Day <- as.Date(weather$Day, "%m/%d/%y")
weather$Day <- as_datetime(weather$Day)
weather <- weather %>%
mutate(ytd = yday(weather$Day))
weather <- weather[-c(8,9,10,11,12)]
taxi_master_trim_weather <- merge(x = weather, y = taxi_master_trim, by = "ytd", all.x=TRUE)
data_new <- count(taxi_master_trim_weather, ytd,Temp.avg..F.)
data_new
## # A tibble: 365 x 3
## ytd Temp.avg..F. n
## <dbl> <int> <int>
## 1 1 29 4387
## 2 2 26 3183
## 3 3 14 2329
## 4 4 19 3548
## 5 5 34 3989
## 6 6 37 3338
## 7 7 12 3979
## 8 8 16 4195
## 9 9 27 4287
## 10 10 34 4553
## # ... with 355 more rows
data_new_snow <- count(filter(taxi_master_trim_weather, Snow == "yes"), ytd,Temp.avg..F.)
data_new_snow
## # A tibble: 22 x 3
## ytd Temp.avg..F. n
## <dbl> <int> <int>
## 1 2 26 3183
## 2 10 34 4553
## 3 25 24 4958
## 4 29 19 4298
## 5 34 38 3600
## 6 36 32 3970
## 7 40 26 4889
## 8 44 30 3263
## 9 45 36 3872
## 10 46 32 4699
## # ... with 12 more rows
data_new_rain <- count(filter(taxi_master_trim_weather, Snow == "", Rain == "yes"), ytd,Temp.avg..F.)
data_new_rain
## # A tibble: 104 x 3
## ytd Temp.avg..F. n
## <dbl> <int> <int>
## 1 5 34 3989
## 2 6 37 3338
## 3 11 48 4838
## 4 12 46 4529
## 5 14 48 4009
## 6 50 40 3984
## 7 51 44 4254
## 8 52 43 4617
## 9 71 44 4342
## 10 78 39 4361
## # ... with 94 more rows
data_new_neither <- count(filter(taxi_master_trim_weather, Snow == "", Rain == ""), ytd,Temp.avg..F.)
data_new_neither
## # A tibble: 239 x 3
## ytd Temp.avg..F. n
## <dbl> <int> <int>
## 1 1 29 4387
## 2 3 14 2329
## 3 4 19 3548
## 4 7 12 3979
## 5 8 16 4195
## 6 9 27 4287
## 7 13 44 3658
## 8 15 40 4255
## 9 16 38 4360
## 10 17 39 4593
## # ... with 229 more rows
tbl_snow <- data_new_snow %>%
group_by(Temp.avg..F.) %>%
summarise(mean = mean(n))
tbl_rain <- data_new_rain %>%
group_by(Temp.avg..F.) %>%
summarise(mean = mean(n))
tbl_neither <- data_new_neither %>%
group_by(Temp.avg..F.) %>%
summarise(mean = mean(n))
ggplot(data = tbl_rain) + geom_point(aes(x = Temp.avg..F. , y =mean, color = "Rain" ), alpha = 1) + labs(x = "Degrees F" ,y = "Trip Volume" , title = "Trip Volume v.s. Weather" ) + geom_point(data = tbl_snow , aes(x = Temp.avg..F. , y =mean, color = "Snow" ), alpha = 1) +
geom_point(data = tbl_neither , aes(x = Temp.avg..F. , y =mean, color = "Neither" ), alpha = 1) +
scale_color_manual(name = "Weather", values=c(Rain = "blue", Snow ="purple",
Neither ="orange"))
whole_year <- data.frame(
DateTime=taxi_master_trim$pickup_datetime,
time=format(as.POSIXct(taxi_master_trim$pickup_datetime, format="%Y-%m-%d %H:%M"), format="%H")
)
one_AM <- sum(whole_year$time == '01', na.rm = TRUE)
two_AM <- sum(whole_year$time == '02', na.rm = TRUE)
three_AM <- sum(whole_year$time == '03', na.rm = TRUE)
four_AM <- sum(whole_year$time == '04', na.rm = TRUE)
five_AM <- sum(whole_year$time == '05', na.rm = TRUE)
six_AM <- sum(whole_year$time == '06', na.rm = TRUE)
seven_AM <- sum(whole_year$time == '07', na.rm = TRUE)
eight_AM <- sum(whole_year$time == '08', na.rm = TRUE)
nine_AM <- sum(whole_year$time == '09', na.rm = TRUE)
ten_AM <- sum(whole_year$time == '10', na.rm = TRUE)
eleven_AM <- sum(whole_year$time == '11', na.rm = TRUE)
twelve_PM <- sum(whole_year$time == '12', na.rm = TRUE) #Noon
one_PM <- sum(whole_year$time == '13', na.rm = TRUE)
two_PM <- sum(whole_year$time == '14', na.rm = TRUE)
three_PM <- sum(whole_year$time == '15', na.rm = TRUE)
four_PM <- sum(whole_year$time == '16', na.rm = TRUE)
five_PM <- sum(whole_year$time == '17', na.rm = TRUE)
six_PM <- sum(whole_year$time == '18', na.rm = TRUE)
seven_PM <- sum(whole_year$time == '19', na.rm = TRUE)
eight_PM <- sum(whole_year$time == '20', na.rm = TRUE)
nine_PM <- sum(whole_year$time == '21', na.rm = TRUE)
ten_PM <- sum(whole_year$time == '22', na.rm = TRUE)
eleven_PM <- sum(whole_year$time == '23', na.rm = TRUE)
twelve_AM <- sum(whole_year$time == '00', na.rm = TRUE) #Midnight
hour_labels <- c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)
all_the_hours <- c(twelve_AM, one_AM, two_AM, three_AM, four_AM, five_AM, six_AM, seven_AM, eight_AM, nine_AM, ten_AM, eleven_AM, twelve_PM, one_PM, two_PM, three_PM, four_PM, five_PM, six_PM, seven_PM, eight_PM, nine_PM, ten_PM, eleven_PM)
whole_year <- data.frame(hour_labels, all_the_hours)
each_month <- data.frame(
DateTime=taxi_master_trim$pickup_datetime,
time=format(as.POSIXct(taxi_master_trim$pickup_datetime, format="%Y-%m-%d %H:%M"), format="%m"),
time_hour=format(as.POSIXct(taxi_master_trim$pickup_datetime, format="%Y-%m-%d %H:%M"), format="%H")
)
#Month: JAN
month_JAN <- each_month %>% filter(time == '01')
JAN_hours <- group_by(month_JAN, time_hour)
total_JAN_hours <- summarise(JAN_hours, num = n())
total_JAN_hours <- data.frame(total_JAN_hours)
#Month: FEB
month_FEB <- each_month %>% filter(time == '02')
FEB_hours <- group_by(month_FEB, time_hour)
total_FEB_hours <- summarise(FEB_hours, num = n())
#Month: MAR
month_MAR <- each_month %>% filter(time == '03')
MAR_hours <- group_by(month_MAR, time_hour)
total_MAR_hours <- summarise(MAR_hours, num = n())
#Month: APR
month_APR <- each_month %>% filter(time == '04')
APR_hours <- group_by(month_APR, time_hour)
total_APR_hours <- summarise(APR_hours, num = n())
#Month: MAY
month_MAY <- each_month %>% filter(time == '05')
MAY_hours <- group_by(month_MAY, time_hour)
total_MAY_hours <-summarise(MAY_hours, num = n())
#Month: JUN
month_JUN <- each_month %>% filter(time == '06')
JUN_hours <- group_by(month_JUN, time_hour)
total_JUN_hours <-summarise(JUN_hours, num = n())
#Month: JUL
month_JUL <- each_month %>% filter(time == '07')
JUL_hours <- group_by(month_JUL, time_hour)
total_JUL_hours <- summarise(JUL_hours, num = n())
#Month: AUG
month_AUG <- each_month %>% filter(time == '08')
AUG_hours <- group_by(month_AUG, time_hour)
total_AUG_hours <-summarise(AUG_hours, num = n())
#Month: SEP
month_SEP <- each_month %>% filter(time == '09')
SEP_hours <- group_by(month_SEP, time_hour)
total_SEP_hours <-summarise(SEP_hours, num = n())
#Month: OCT
month_OCT <- each_month %>% filter(time == '10')
OCT_hours <- group_by(month_OCT, time_hour)
total_OCT_hours <-summarise(OCT_hours, num = n())
#Month: NOV
month_NOV <- each_month %>% filter(time == '11')
NOV_hours <- group_by(month_NOV, time_hour)
total_NOV_hours <-summarise(NOV_hours, num = n())
#Month: DEC
month_DEC <- each_month %>% filter(time == '12')
DEC_hours <- group_by(month_DEC, time_hour)
total_DEC_hours <- summarise(DEC_hours, num = n())
all_the_months <- c(total_JAN_hours, total_FEB_hours, total_MAR_hours, total_APR_hours, total_MAY_hours, total_JUN_hours, total_JUL_hours, total_AUG_hours, total_SEP_hours, total_OCT_hours, total_NOV_hours, total_DEC_hours)
monthly_data_frame <- data.frame(hour_labels, all_the_months)
monthly_data_frame$time_hour <- NULL
monthly_data_frame$time_hour.1 <- NULL
monthly_data_frame$time_hour.2 <- NULL
monthly_data_frame$time_hour.3 <- NULL
monthly_data_frame$time_hour.4 <- NULL
monthly_data_frame$time_hour.5 <- NULL
monthly_data_frame$time_hour.6 <- NULL
monthly_data_frame$time_hour.7 <- NULL
monthly_data_frame$time_hour.8 <- NULL
monthly_data_frame$time_hour.9 <- NULL
monthly_data_frame$time_hour.10 <- NULL
monthly_data_frame$time_hour.11 <- NULL
monthly_data_frame$time_hour.12 <- NULL
plot(hour_labels,monthly_data_frame$num, xlab = "HOUR OF THE DAY", ylab = "AMOUNT OF TRIPS", type = 'l', col = 'green', main = 'NUMBER OF RIDES PER HOUR OF EACH MONTH')
par(new=TRUE)
plot(hour_labels,monthly_data_frame$num.1, yaxt='n', ann=FALSE, type = 'l', col = 'green')
par(new=TRUE)
plot(hour_labels,monthly_data_frame$num.2, yaxt='n', ann=FALSE, type = 'l', col = 'green')
par(new=TRUE)
plot(hour_labels,monthly_data_frame$num.3, yaxt='n', ann=FALSE, type = 'l', col = 'green')
par(new=TRUE)
plot(hour_labels,monthly_data_frame$num.4, yaxt='n', ann=FALSE, type = 'l', col = 'green')
par(new=TRUE)
plot(hour_labels,monthly_data_frame$num.5, yaxt='n', ann=FALSE, type = 'l', col = 'green')
par(new=TRUE)
plot(hour_labels,monthly_data_frame$num.6, yaxt='n', ann=FALSE, type = 'l', col = 'green')
par(new=TRUE)
plot(hour_labels,monthly_data_frame$num.7, yaxt='n', ann=FALSE, type = 'l', col = 'green')
par(new=TRUE)
plot(hour_labels,monthly_data_frame$num.8, yaxt='n', ann=FALSE, type = 'l', col = 'green')
par(new=TRUE)
plot(hour_labels,monthly_data_frame$num.9, yaxt='n', ann=FALSE, type = 'l', col = 'green')
par(new=TRUE)
plot(hour_labels,monthly_data_frame$num.10, yaxt='n', ann=FALSE, type = 'l', col = 'red')
par(new=TRUE)
plot(hour_labels,monthly_data_frame$num.11, yaxt='n', ann=FALSE, type = 'l', col = 'red')
par(new=TRUE)
plot(hour_labels,all_the_hours, yaxt='n', ann=FALSE, type = 'l', lwd=2)
legend("bottomright", c("Year-Round","Holidays", "Other Months"),cex=.8,
col=c("black","red", "green"),pch=c(1))
whole_year <- data.frame(
DateTime=taxi_master_trim$pickup_datetime,
time=format(as.POSIXct(taxi_master_trim$pickup_datetime, format="%Y-%m-%d %H:%M"), format="%H")
)
weekday_taxi <- whole_year %>% mutate(weekday = wday(DateTime, label = TRUE)) %>% filter(!wday(DateTime) %in% c(1,7))
weekend_taxi <- whole_year %>% mutate(weekday = wday(DateTime, label = TRUE)) %>% filter(!wday(DateTime) %in% c(2,3,4,5,6))
weekday_weekend <- rbind(weekday_taxi, weekend_taxi)
#Hour One:
weekdays_summed_one <- sum(weekday_weekend$time == '01', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Two:
weekdays_summed_two <- sum(weekday_weekend$time == '02', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Three:
weekdays_summed_three <- sum(weekday_weekend$time == '03', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Four:
weekdays_summed_four <- sum(weekday_weekend$time == '04', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Five:
weekdays_summed_five <- sum(weekday_weekend$time == '05', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Six:
weekdays_summed_six <- sum(weekday_weekend$time == '06', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Seven:
weekdays_summed_seven <- sum(weekday_weekend$time == '07', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Eight:
weekdays_summed_eight <- sum(weekday_weekend$time == '08', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Nine:
weekdays_summed_nine <- sum(weekday_weekend$time == '09', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Ten:
weekdays_summed_ten <- sum(weekday_weekend$time == '10', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Eleven:
weekdays_summed_eleven <- sum(weekday_weekend$time == '11', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Twelve (NOON)
weekdays_summed_twelve <- sum(weekday_weekend$time == '12', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour One PM (13):
weekdays_summed_thirteen <- sum(weekday_weekend$time == '13', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Two PM (14):
weekdays_summed_fourteen <- sum(weekday_weekend$time == '14', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Three PM (15):
weekdays_summed_fifteen <- sum(weekday_weekend$time == '15', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Four PM (16):
weekdays_summed_sixteen <- sum(weekday_weekend$time == '16', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Five PM (17):
weekdays_summed_seventeen <- sum(weekday_weekend$time == '17', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Six PM (18):
weekdays_summed_eighteen <- sum(weekday_weekend$time == '18', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Seven PM (19):
weekdays_summed_nineteen <- sum(weekday_weekend$time == '19', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Eight PM (20):
weekdays_summed_twenty <- sum(weekday_weekend$time == '20', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Nine PM (21):
weekdays_summed_twentyone <- sum(weekday_weekend$time == '21', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Ten PM (22):
weekdays_summed_twentytwo <- sum(weekday_weekend$time == '22', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Eleven PM (23):
weekdays_summed_twentythree <- sum(weekday_weekend$time == '23', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour Midnight (00):
weekdays_summed_twentyfour <- sum(weekday_weekend$time == '00', weekday_weekend$weekday == 'Mon', weekday_weekend$weekday == 'Tue', weekday_weekend$weekday == 'Wed', weekday_weekend$weekday == 'Thu', weekday_weekend$weekday == 'Fri', na.rm = TRUE)
#Hour One:
weekends_summed_one <- sum(weekday_weekend$time == '01', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Two:
weekends_summed_two <- sum(weekday_weekend$time == '02', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Three:
weekends_summed_three <- sum(weekday_weekend$time == '03', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Four:
weekends_summed_four <- sum(weekday_weekend$time == '04', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Five:
weekends_summed_five <- sum(weekday_weekend$time == '05', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Six:
weekends_summed_six <- sum(weekday_weekend$time == '06',weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Seven:
weekends_summed_seven <- sum(weekday_weekend$time == '07',weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Eight:
weekends_summed_eight <- sum(weekday_weekend$time == '08', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Nine:
weekends_summed_nine <- sum(weekday_weekend$time == '09',weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Ten:
weekends_summed_ten <- sum(weekday_weekend$time == '10',weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Eleven:
weekends_summed_eleven <- sum(weekday_weekend$time == '11',weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Twelve (NOON)
weekends_summed_twelve <- sum(weekday_weekend$time == '12',weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour One PM (13):
weekends_summed_thirteen <- sum(weekday_weekend$time == '13', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Two PM (14):
weekends_summed_fourteen <- sum(weekday_weekend$time == '14', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Three PM (15):
weekends_summed_fifteen <- sum(weekday_weekend$time == '15', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Four PM (16):
weekends_summed_sixteen <- sum(weekday_weekend$time == '16', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Five PM (17):
weekends_summed_seventeen <- sum(weekday_weekend$time == '17', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Six PM (18):
weekends_summed_eighteen <- sum(weekday_weekend$time == '18', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Seven PM (19):
weekends_summed_nineteen <- sum(weekday_weekend$time == '19', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Eight PM (20):
weekends_summed_twenty <- sum(weekday_weekend$time == '20', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Nine PM (21):
weekends_summed_twentyone <- sum(weekday_weekend$time == '21', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Ten PM (22):
weekends_summed_twentytwo <- sum(weekday_weekend$time == '22',weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Eleven PM (23):
weekends_summed_twentythree <- sum(weekday_weekend$time == '23', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
#Hour Midnight (00):
weekends_summed_twentyfour <- sum(weekday_weekend$time == '00', weekday_weekend$weekday == 'Sat', weekday_weekend$weekday == 'Sun', na.rm = TRUE)
weekdays_combined <- c(weekdays_summed_twentyfour, weekdays_summed_one, weekdays_summed_two, weekdays_summed_three, weekdays_summed_four, weekdays_summed_five, weekdays_summed_six, weekdays_summed_seven, weekdays_summed_eight, weekdays_summed_nine, weekdays_summed_ten, weekdays_summed_eleven, weekdays_summed_twelve, weekdays_summed_thirteen, weekdays_summed_fourteen, weekdays_summed_fifteen, weekdays_summed_sixteen,weekdays_summed_seventeen, weekdays_summed_eighteen, weekdays_summed_nineteen, weekdays_summed_twenty, weekdays_summed_twentyone, weekdays_summed_twentytwo, weekdays_summed_twentythree)
weekdays_combined <- data.frame(weekdays_combined)
colnames(weekdays_combined) <- "Weekday"
row.names(weekdays_combined) <- c("00", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23")
weekends_combined <- c(weekends_summed_twentyfour, weekends_summed_one, weekends_summed_two, weekends_summed_three, weekends_summed_four, weekends_summed_five, weekends_summed_six, weekends_summed_seven, weekends_summed_eight, weekends_summed_nine, weekends_summed_ten, weekends_summed_eleven, weekends_summed_twelve, weekends_summed_thirteen, weekends_summed_fourteen, weekends_summed_fifteen, weekends_summed_sixteen, weekends_summed_seventeen, weekends_summed_eighteen, weekends_summed_nineteen, weekends_summed_twenty, weekends_summed_twentyone, weekends_summed_twentytwo, weekends_summed_twentythree)
weekends_combined <- data.frame(weekends_combined)
colnames(weekends_combined) <- "Weekend"
row.names(weekends_combined) <- c("00", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23")
weekdays_weekends_combined <- c(weekdays_combined, weekends_combined)
weekdays_weekends_combined <- data.frame(hour_labels, weekdays_weekends_combined)
plot(weekdays_weekends_combined$Weekday, ylab = "NUMBER OF RIDES", xlab = "HOUR", type = 'o', col = 'pink', main = 'NUMBER OF RIDES PER HOUR - WEEKDAY')
plot(weekdays_weekends_combined$Weekend, ylab = "NUMBER OF RIDES", xlab = "HOUR", type = 'o', col = 'salmon', main = 'NUMBER OF RIDES PER HOUR - WEEKEND')