library(ggplot2)
## Price Histograms with Facet and Color
dat = diamonds
names(dat)
## [1] "carat" "cut" "color" "clarity" "depth" "table" "price"
## [8] "x" "y" "z"
ggplot(aes(log(price)), data=dat) +
geom_histogram(aes(fill=cut)) +
scale_fill_brewer(type='qual') +
facet_wrap(~color) +
scale_y_continuous(breaks=seq(0,700,200))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(aes(table, price), data=dat) +
geom_point(aes(color=cut),
alpha=0.5) +
scale_color_brewer(type='qual') +
scale_x_continuous(breaks = seq(50,80,2)) +
coord_cartesian(xlim = c(50,80))
dat$volume = dat$x*dat$y*dat$z
ggplot(aes(volume, price), data=dat) +
geom_point(aes(color=clarity)) +
xlim(0, quantile(dat$volume, .99)) +
scale_color_brewer(type='div') +
scale_y_log10()
## Warning: Removed 540 rows containing missing values (geom_point).
ggplot(aes(cut, price/carat), data=dat) +
ylab('Price per Carat') +
xlab('Cut') +
geom_jitter(aes(color=color)) +
scale_color_brewer(type='div') +
facet_wrap(~clarity)
###### pf
pf = read.csv('../lesson3/pseudo_facebook.tsv',
sep='\t')
Make proportion of friends and friend requests
pf$prop_initiated = ifelse(pf$friend_count>0,
pf$friendships_initiated/pf$friend_count,
0)
str(pf$prop_initiated)
## num [1:99003] 0 0 0 0 0 0 0 0 0 0 ...
pf$year_joined = floor(2014-pf$tenure/365)
pf$year_joined.bucket = cut(pf$year_joined, breaks = c(2004,
2009,
2011,
2012,
2014))
ggplot(aes(tenure, prop_initiated), data=pf) +
geom_line(aes(color=year_joined.bucket),
stat = 'summary', fun.y=median)
## Warning: Removed 2 rows containing non-finite values (stat_summary).
ggplot(aes(tenure, prop_initiated), data=pf[pf$tenure>0,]) +
geom_line(aes(color=year_joined.bucket),
stat = 'summary', fun.y=median) +
geom_smooth()
## Warning: Removed 2 rows containing non-finite values (stat_summary).
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
by(pf$prop_initiated, pf$year_joined.bucket, summary)
## pf$year_joined.bucket: (2004,2009]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.3415 0.4669 0.4665 0.5909 1.0000
## --------------------------------------------------------
## pf$year_joined.bucket: (2009,2011]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.3894 0.5342 0.5273 0.6744 1.0000
## --------------------------------------------------------
## pf$year_joined.bucket: (2011,2012]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.4500 0.6154 0.5911 0.7600 1.0000
## --------------------------------------------------------
## pf$year_joined.bucket: (2012,2014]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.5000 0.6912 0.6430 0.8438 1.0000