library(ggplot2)

## Price Histograms with Facet and Color
dat = diamonds
names(dat)
##  [1] "carat"   "cut"     "color"   "clarity" "depth"   "table"   "price"  
##  [8] "x"       "y"       "z"
ggplot(aes(log(price)), data=dat) +
  geom_histogram(aes(fill=cut)) +
  scale_fill_brewer(type='qual') +
  facet_wrap(~color) +
  scale_y_continuous(breaks=seq(0,700,200))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(aes(table, price), data=dat) +
  geom_point(aes(color=cut),
             alpha=0.5) +
  scale_color_brewer(type='qual') +
  scale_x_continuous(breaks = seq(50,80,2)) +
  coord_cartesian(xlim = c(50,80))

dat$volume = dat$x*dat$y*dat$z
ggplot(aes(volume, price), data=dat) +
  geom_point(aes(color=clarity)) +
  xlim(0, quantile(dat$volume, .99)) +
  scale_color_brewer(type='div') +
  scale_y_log10()
## Warning: Removed 540 rows containing missing values (geom_point).

ggplot(aes(cut, price/carat), data=dat) + 
  ylab('Price per Carat') +
  xlab('Cut') +
  geom_jitter(aes(color=color)) +
  scale_color_brewer(type='div') +
  facet_wrap(~clarity)

###### pf
pf = read.csv('../lesson3/pseudo_facebook.tsv',
              sep='\t')

Make proportion of friends and friend requests

pf$prop_initiated = ifelse(pf$friend_count>0, 
                           pf$friendships_initiated/pf$friend_count,
                           0)
str(pf$prop_initiated)
##  num [1:99003] 0 0 0 0 0 0 0 0 0 0 ...
pf$year_joined = floor(2014-pf$tenure/365)
pf$year_joined.bucket = cut(pf$year_joined, breaks = c(2004,
                                                       2009,
                                                       2011,
                                                       2012,
                                                       2014))
ggplot(aes(tenure, prop_initiated), data=pf) +
  geom_line(aes(color=year_joined.bucket),
            stat = 'summary', fun.y=median)
## Warning: Removed 2 rows containing non-finite values (stat_summary).

ggplot(aes(tenure, prop_initiated), data=pf[pf$tenure>0,]) +
  geom_line(aes(color=year_joined.bucket),
              stat = 'summary', fun.y=median) +
  geom_smooth()
## Warning: Removed 2 rows containing non-finite values (stat_summary).
## Warning: Removed 2 rows containing non-finite values (stat_smooth).

by(pf$prop_initiated, pf$year_joined.bucket, summary)
## pf$year_joined.bucket: (2004,2009]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.3415  0.4669  0.4665  0.5909  1.0000 
## -------------------------------------------------------- 
## pf$year_joined.bucket: (2009,2011]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.3894  0.5342  0.5273  0.6744  1.0000 
## -------------------------------------------------------- 
## pf$year_joined.bucket: (2011,2012]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.4500  0.6154  0.5911  0.7600  1.0000 
## -------------------------------------------------------- 
## pf$year_joined.bucket: (2012,2014]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.5000  0.6912  0.6430  0.8438  1.0000