fbpx
Select Page

R Visualizations- Part 2

R Visualizations – ggplot2  (PART-2)

 

  1. Distribution

Study of how and where data points are distributed is very important in large amount of data.

Histogram

Histogram plot  – continuous variable

library(ggplot2)

theme_set(theme_classic())

 

graph <- ggplot(mpg, aes(displ)) + scale_fill_brewer(palette = “Spectral”)

 

graph + geom_histogram(aes(fill=class), binwidth = .1, col=”black”,  size=.1) +  

        labs(title=”Histogram with Auto Binning”, subtitle=”Engine Displacement across Vehicle Classes”)  

 

g raph+ geom_histogram(aes(fill=class), bins=5,  col=”black”, size=.1) + 

  labs(title=”Histogram with Fixed Bins”,  subtitle=”Engine Displacement across Vehicle Classes”) 

 

Histogram plot  — categorical variable

library(ggplot2)

theme_set(theme_classic())

graph <- ggplot(mpg, aes(manufacturer))

 

graph + geom_bar(aes(fill=class), width = 0.5) + 

  theme(axis.text.x = element_text(angle=65, vjust=0.6)) + 

  labs(title=”Histogram on Categorical Variable”, 

       subtitle=”Manufacturer across Vehicle Classes”) 

 

Density plot

library(ggplot2)

theme_set(theme_classic())

 

graph <- ggplot(mpg, aes(cty))

graph + geom_density(aes(fill=factor(cyl)), alpha=0.8) + 

    labs(title=”Density plot”, subtitle=”City Mileage Grouped by Number of cylinders”,

         caption=”Source: mpg”, x=”City Mileage”, fill=”# Cylinders”)

 

Box Plot

To study the data distribution Box plot is an excellent tool. It will show the basic summary information through plot.

library(ggplot2)

theme_set(theme_classic())

 

graph <- ggplot(mpg, aes(class, cty))

graph + geom_boxplot(varwidth=T, fill=”plum”) + 

    labs(title=”Box plot”,subtitle=”City Mileage grouped by Class of vehicle”,

         caption=”Source: mpg”, x=”Class of Vehicle”, y=”City Mileage”)

 

library(ggthemes)

graph <- ggplot(mpg, aes(class, cty))

graph + geom_boxplot(aes(fill=factor(cyl))) +  theme(axis.text.x = element_text(angle=65, vjust=0.6)) +

  labs(title=”Box plot”,  subtitle=”City Mileage grouped by Class of vehicle”, caption=”Source: mpg”,

       x=”Class of Vehicle”,  y=”City Mileage”)

 

Dot + Box Plot

library(ggplot2)

theme_set(theme_bw())

 

graph<- ggplot(mpg, aes(manufacturer, cty))

 

graph + geom_boxplot() + geom_dotplot(binaxis=’y’,stackdir=’center’, dotsize = .5,fill=”red”) +

theme(axis.text.x = element_text(angle=65, vjust=0.6)) + 

labs(title=”Box plot + Dot plot”,subtitle=”City Mileage vs Class: Each dot represents 1 row in source data”,

       caption=”Source: mpg”,  x=”Class of Vehicle”,y=”City Mileage”)

 

Tufte Boxplot

library(ggthemes)

library(ggplot2)

theme_set(theme_tufte())  

 

graph <- ggplot(mpg, aes(manufacturer, cty))

graph + geom_tufteboxplot() + 

      theme(axis.text.x = element_text(angle=65, vjust=0.6)) + 

      labs(title=”Tufte Styled Boxplot”, subtitle=”City Mileage grouped by Class of vehicle”,

           caption=”Source: mpg”, x=”Class of Vehicle”, y=”City Mileage”)

For Free, Demo classes Call: 8605110150
Registration Link: Click Here!

Violin Plot

library(ggplot2)

theme_set(theme_bw())

 

graph <- ggplot(mpg, aes(class, cty))

graph + geom_violin() +  labs(title=”Violin plot”, subtitle=”City Mileage vs Class of vehicle”,

       caption=”Source: mpg”, x=”Class of Vehicle”, y=”City Mileage”)

Population Pyramid

library(ggplot2)

library(ggthemes)

options(scipen = 999)  

 

email_campaign <- read.csv(“https://raw.githubusercontent.com/selva86/datasets/master/email_campaign_funnel.csv”)

 

brks <- seq(-15000000, 15000000, 5000000)

lbls = paste0(as.character(c(seq(15, 0, -5), seq(5, 15, 5))), “m”)

 

ggplot(email_campaign, aes(x = Stage, y = Users, fill = Gender)) +   

     geom_bar(stat = “identity”, width = .6) +    scale_y_continuous(breaks = brks, labels = lbls) + 

      coord_flip() +   labs(title=”Email Campaign “) +  theme_tufte() +  

                              theme(plot.title = element_text(hjust = .5), 

                                    axis.ticks = element_blank()) +   

                              scale_fill_brewer(palette = “Dark2”) 

For Free, Demo classes Call: 8605110150
Registration Link: Click Here!

Violin Plot

  1. Composition

Waffle Chart

v <- mpg$class  

 

nr <- 10

dfrm <- expand.grid(y = 1:nr, x = 1:nr)

table <- round(table(v) * ((nr*nr)/(length(v))))

table

 

dfrm$category <- factor(rep(names(table), table))  

 

ggplot(dfrm, aes(x = x, y = y, fill = category)) + 

        geom_tile(color = “black”, size = 0.5) +

        scale_x_continuous(expand = c(0, 0)) +

        scale_y_continuous(expand = c(0, 0), trans = ‘reverse’) +

        scale_fill_brewer(palette = “Set3”) +

        labs(title=”Waffle Chart”, subtitle=”‘Class’ of vehicles”,

             caption=”Source: mpg”) + 

        theme(panel.border = element_rect(size = 2),

              plot.title = element_text(size = rel(1.2)),

              axis.text = element_blank(),

              axis.title = element_blank(),

              axis.ticks = element_blank(),

              legend.title = element_blank(),

              legend.position = “right”)

 

Pie Chart

library(ggplot2)

theme_set(theme_classic())

 

data <- as.data.frame(table(mpg$class))

colnames(data) <- c(“class”, “freq”)

piechart <- ggplot(data, aes(x = “”, y=freq, fill = factor(class))) + 

  geom_bar(width = 1, stat = “identity”) +

  theme(axis.line = element_blank(), 

        plot.title = element_text(hjust=0.5)) + 

  labs(fill=”class”, x=NULL,y=NULL, title=”Pie Chart of class”, caption=”Source: mpg”)

 

piechart + coord_polar(theta = “y”, start=0)

 

piechart <- ggplot(mpg, aes(x = “”, fill = factor(class))) + 

  geom_bar(width = 1) +  theme(axis.line = element_blank(),plot.title = element_text(hjust=0.5)) + 

  labs(fill=”class”, x=NULL, y=NULL,title=”Pie Chart of class”,caption=”Source: mpg”)

  

piechart + coord_polar(theta = “y”, start=0)

For Free, Demo classes Call: 8605110150
Registration Link: Click Here!

Violin Plot

Treemap

library(ggplot2) 

library(treemapify)

langs <- read.csv(“https://raw.githubusercontent.com/selva86/datasets/master/proglanguages.csv”)

 

treeMap <- treemapify(langs,area = “value”,fill = “parent”,label = “id”,group = “parent”)

 

treePlot <- ggplotify(treeMap) + 

                  scale_x_continuous(expand = c(0, 0)) +

                  scale_y_continuous(expand = c(0, 0)) +

                  scale_fill_brewer(palette = “Dark2”)

 

print(treePlot)

 

Bar Chart

freq <- table(mpg$manufacturer)

data <- as.data.frame.table(freq)

head(data)

library(ggplot2)

theme_set(theme_classic())

 

graph <- ggplot(data, aes(Var1, Freq))

graph + geom_bar(stat=”identity”, width = 0.5, fill=”tomato2″) + 

      labs(title=”Bar Chart”, subtitle=”Manufacturer of vehicles”, 

      caption=”Source: Frequency of Manufacturers from ‘mpg’ dataset”) +

      theme(axis.text.x = element_text(angle=65, vjust=0.6))

 

graph <- ggplot(mpg, aes(manufacturer))

graph + geom_bar(aes(fill=class), width = 0.5) + 

  theme(axis.text.x = element_text(angle=65, vjust=0.6)) +

  labs(title=”Categorywise Bar Chart”, subtitle=”Manufacturer of vehicles”, 

       caption=”Source: Manufacturers from ‘mpg’ dataset”)

 

  1. Change

library(ggplot2)

library(ggfortify)

theme_set(theme_classic())

 

autoplot(AirPassengers) + labs(title=”AirPassengers”) + theme(plot.title = element_text(hjust=0.5))

 

library(ggplot2)

theme_set(theme_classic())

For Free, Demo classes Call: 8605110150
Registration Link: Click Here!

ggplot(economics, aes(x=date)) +  geom_line(aes(y=returns_perc)) + 

  labs(title=”Time Series Chart”, subtitle=”Returns Percentage from ‘Economics’ Dataset”, 

       caption=”Source: Economics”,  y=”Returns %”)

 

library(ggplot2)

library(lubridate)

theme_set(theme_bw())

 

economic <- economics[1:24, ]

 

lbels <- paste0(month.abb[month(economic$date)], ” “, lubridate::year(economic$date))

breks <- economic$date

ggplot(economic, aes(x=date)) +  geom_line(aes(y=returns_perc)) + 

  labs(title=”Monthly Time Series”, subtitle=”Returns Percentage from Economics Dataset”, 

       caption=”Source: Economics”, y=”Returns %”) +  scale_x_date(labels = lbels, breaks = breks) +  

  theme(axis.text.x = element_text(angle = 90, vjust=0.5), panel.grid.minor = element_blank())  

 

library(ggplot2)

library(lubridate)

theme_set(theme_bw())

 

economic <- economics[1:90, ]

breks <- economic$date[seq(1, length(economic$date), 12)]

lbels <- lubridate::year(breks)

ggplot(economic, aes(x=date)) + geom_line(aes(y=returns_perc)) + 

  labs(title=”Yearly Time Series”,  subtitle=”Returns Percentage from Economics Dataset”, 

       caption=”Source: Economics”, y=”Returns %”) + 

  scale_x_date(labels = lbels,breaks = breks) + 

  theme(axis.text.x = element_text(angle = 90, vjust=0.5), 

        panel.grid.minor = element_blank())  

 

data(economics_long, package = “ggplot2”)

head(economics_long)

 

library(ggplot2)

library(lubridate)

theme_set(theme_bw())

 

daf <- economics_long[economics_long$variable %in% c(“psavert”, “uempmed”), ]

daf <- df[lubridate::year(daf$date) %in% c(1967:1981), ]

 

breks <- daf$date[seq(1, length(daf$date), 12)]

lbels <- lubridate::year(breks)

 

ggplot(daf, aes(x=date)) + 

  geom_line(aes(y=value, col=variable)) + 

  labs(title=”Time Series of Returns Percentage”, 

       subtitle=”Drawn from Long Data format”, 

       caption=”Source: Economics”, y=”Returns %”, 

       color=NULL) +  scale_x_date(labels = lbels, breaks = breks) +  

  scale_color_manual(labels = c(“psavert”, “uempmed”), 

                     values = c(“psavert”=”#00ba38”, “uempmed”=”#f8766d”)) +  

  theme(axis.text.x = element_text(angle = 90, vjust=0.5, size = 8), 

        panel.grid.minor = element_blank()) 

 

library(ggplot2)

library(lubridate)

theme_set(theme_bw())

 

daf <- economics[, c(“date”, “psavert”, “uempmed”)]

daf <- daf[lubridate::year(daf$date) %in% c(1967:1981), ]

 

breks <- daf$date[seq(1, length(daf$date), 12)]

lbels <- lubridate::year(breks)

 

ggplot(daf, aes(x=date)) + 

  geom_line(aes(y=psavert, col=”psavert”)) + 

  geom_line(aes(y=uempmed, col=”uempmed”)) + 

  labs(title=”Time Series of Returns Percentage”, 

       subtitle=”Drawn From Wide Data format”, 

       caption=”Source: Economics”, y=”Returns %”) +  

  scale_x_date(labels = lbels, breaks = breks) + 

  scale_color_manual(name=””, 

                     values = c(“psavert”=”#00ba38”, “uempmed”=”#f8766d”)) +  

  theme(panel.grid.minor = element_blank()) 

 

Stacked Area Chart

 

library(ggplot2)

library(lubridate)

theme_set(theme_bw())

 

daf <- economics[, c(“date”, “psavert”, “uempmed”)]

daf <- df[lubridate::year(daf$date) %in% c(1967:1981), ]

 

breks <- daf$date[seq(1, length(daf$date), 12)]

lbels <- lubridate::year(breks)

 

ggplot(daf, aes(x=date)) + 

  geom_area(aes(y=psavert+uempmed, fill=”psavert”)) + 

  geom_area(aes(y=uempmed, fill=”uempmed”)) + 

  labs(title=”Area Chart of Returns Percentage”, subtitle=”From Wide Data format”, 

       caption=”Source: Economics”,  y=”Returns %”) +  

 scale_x_date(labels = lbels, breaks = breks) + 

  scale_fill_manual(name=””,values = c(“psavert”=”#00ba38”, “uempmed”=”#f8766d”)) +

  theme(panel.grid.minor = element_blank())  

 

Calendar Heatmap

library(ggplot2)

library(plyr)

library(scales)

library(zoo)

 

daf <- read.csv(“https://raw.githubusercontent.com/selva86/datasets/master/yahoo.csv”)

daf$date <- as.Date(daf$date) 

daf <- daf[daf$year >= 2012, ]  

 

daf$yearmonth <- as.yearmon(daf$date)

daf$yearmonthf <- factor(daf$yearmonth)

daf <- ddply(daf,.(yearmonthf), transform, monthweek=1+week-min(week)) 

daf <- daf[, c(“year”, “yearmonthf”, “monthf”, “week”, “monthweek”, “weekdayf”, “VIX.Close”)]

head(daf)

 

ggplot(daf, aes(monthweek, weekdayf, fill = VIX.Close)) + 

  geom_tile(colour = “white”) + 

  facet_grid(year~monthf) + 

  scale_fill_gradient(low=”red”, high=”green”) +

  labs(x=”Week of Month”, y=””, title = “Time-Series Calendar Heatmap”, 

       subtitle=”Yahoo Closing Price”, fill=”Close”)

 

Slope Chart

library(dplyr)

theme_set(theme_classic())

source_daf <- read.csv(“https://raw.githubusercontent.com/jkeirstead/r-slopegraph/master/cancer_survival_rates.csv”)

 

sort <- function(daf, x=”year”, y=”value”, group=”group”, method=”tufte”, min.space=0.05) {

        id <- match(c(x, y, group), names(daf))

    daf <- daf[,id]

    names(daf) <- c(“x”, “y”, “group”)

 

   

    temp <- expand.grid(x=unique(daf$x), group=unique(daf$group))

    temp <- merge(daf, temp, all.y=TRUE)

    daf <- mutate(temp, y=ifelse(is.na(y), 0, y))

  

   

 require(reshape2)

    temp <- dcast(daf, group ~ x, value.var=”y”)

    ordr <- order(temp[,2])

    temp <- temp[ordr,]

    

    min.sp<- min.sp*diff(range(temp[,-1]))

    y <- numeric(nrow(temp))

    

for (i in 2:nrow(temp)) {

       

 matx <- as.matrix(temp[(i-1):i, -1])

        dmin <- min(diff(matx))

        y[i] <- ifelse(dmin < min.sp, min.sp – dmin, 0)

    }

 

    

    temp <- cbind(temp, yshift=cumsum(y))

 

    scale <- 1

    temp <- melt(temp, id=c(“group”, “yshift”), variable.name=”x”, value.name=”y”)

       temp <- transform(temp, ypos=y + scale*y)

    return(temp)

   

}

 

plot <- function(daf) {

    y <- subset(daf, x==head(x,1))$group

    yv <- subset(daf, x==head(x,1))$ypos

    fontS <- 3

    ggp <- ggplot(daf,aes(x=x,y=ypos)) +

        geom_line(aes(group=group),colour=”grey80″) +

        geom_point(colour=”white”,size=8) +

        geom_text(aes(label=y), size=fontS, family=”American Typewriter”) +

        scale_y_continuous(name=””, breaks=yv, labels=y)

    return(ggp)

}    

    

daf <- tufte_sort(source_daf, x=”year”,  y=”value”, group=”group”, method=”tufte”, 

                 min.space=0.05)

For Free, Demo classes Call: 8605110150
Registration Link: Click Here!

daf <- transform(daf, x=factor(x, levels=c(5,10,15,20), 

              labels=c(“5 years”,”10 years”,”15 years”,”20 years”)), y=round(y))

 

plot_slopegraph(daf) + labs(title=”Estimates of % survival rates”) + 

             theme(axis.title=element_blank(),axis.ticks = element_blank(),

             plot.title = element_text(hjust=0.5, family = “American Typewriter”, face=”bold”),

                            axis.text = element_text(family = “American Typewriter”, face=”bold”))

 

Seasonal Plot

library(ggplot2)

library(forecast)

theme_set(theme_classic())

 

small <- window(nottem, start=c(1920, 1), end=c(1925, 12))  

ggseasonplot(AirPassengers) + labs(title=”Seasonal plot: International Airline Passengers”)

ggseasonplot(nottem_small) + labs(title=”Seasonal plot: Air temperatures at Nottingham Castle”)

 

  1. Groups

library(ggplot2)

library(ggdendro)

theme_set(theme_bw())

 

hcd <- hclust(dist(USArrests), “ave”)  

ggdendrogram(hcd, rotate = TRUE, size = 2)

 

Clusters

library(ggplot2)

library(ggalt)

library(ggfortify)

theme_set(theme_classic())

 

daf <- iris[c(1, 2, 3, 4)]

pca_mod <- prcomp(daf)  

daf_pc <- data.frame(pca_mod$x, Species=iris$Species)  

daf_pc_vir <- daf_pc[daf_pc$Species == “virginica”, ]  

daf_pc_set <- daf_pc[daf_pc$Species == “setosa”, ] 

daf_pc_ver <- daf_pc[daf_pc$Species == “versicolor”, ]  

 

ggplot(daf_pc, aes(PC1, PC2, col=Species)) + 

  geom_point(aes(shape=Species), size=2) +  labs(title=”Iris Clustering”, 

       subtitle=”With principal components PC1 and PC2 as X and Y axis”,

       caption=”Source: Iris”) +  coord_cartesian(xlim = 1.2 * c(min(daf_pc$PC1), max(daf_pc$PC1)), 

                  ylim = 1.2 * c(min(daf_pc$PC2), max(daf_pc$PC2))) +  

  geom_encircle(data = daf_pc_vir, aes(x=PC1, y=PC2)) +  

  geom_encircle(data = daf_pc_set, aes(x=PC1, y=PC2)) + 

  geom_encircle(data = daf_pc_ver, aes(x=PC1, y=PC2))

 

  1. Spatial

 

library(ggplot2)

library(ggmap)

library(ggalt)

 

ch <-  geocode(“Chennai”) 

 

ch_sat_map <- qmap(“chennai”, zoom=12, source = “google”, maptype=”satellite”)  

 

ch_road_map <- qmap(“chennai”, zoom=12, source = “google”, maptype=”roadmap”)  

 

ch_hybrid_map <- qmap(“chennai”, zoom=12, source = “google”, maptype=”hybrid”)  

 

ch_osm_map <- qmap(“chennai”, zoom=12, source = “osm”)   

 

ch_places <- c(“Kolathur”,

                    “Washermanpet”,

                    “Royapettah”,

                    “Adyar”,

                    “Guindy”)

 

pl_loc <- geocode(chennai_places)  

 

ch_osm_map + geom_point(aes(x=lon, y=lat),

                             data = pl_loc, 

                             alpha = 0.7, 

                             size = 7, 

                             color = “tomato”) + 

                  geom_encircle(aes(x=lon, y=lat),

                                data = pl_loc, size = 2, color = “blue”)

 

ch_road_map + geom_point(aes(x=lon, y=lat),

                                  data = pl_loc, 

                                  alpha = 0.7, 

                                  size = 7, 

                                  color = “tomato”) + 

                       geom_encircle(aes(x=lon, y=lat),

                                     data = pl_loc, size = 2, color = “blue”)

 

ch_hybrid_map + geom_point(aes(x=lon, y=lat),

                                     data = pl_loc, 

                                     alpha = 0.7, 

                                     size = 7, 

                                     color = “tomato”) + 

                          geom_encircle(aes(x=lon, y=lat),

                                        data = pl_loc, size = 2, color = “blue”)

 

Sample Plot practice:-

1) Sample_Numbers<-table(mtcars$cyl,mtcars$gear)

     barplot(Sample_Numbers,main=’Automobile cylinder number

     gears’,col=c(‘red’,’orange’,’steelblue’),        

     legend=rownames(Sample_Numbers),xlab=’Number of Gears’,

        ylab=’count’)

 

2) hist(airquality$Temp,col=’steelblue’,main=’Maximum Daily Temperature’,xlab=’Temperature (degrees Fahrenheit)’)

 

3) Sample_x<-rnorm(10,mean=rep(1:5,each=2),sd=0.7) 

     Sample_y<-rnorm(10,mean=rep(c(1,9),each=5),sd=0.1)

     data<-data.frame(x=Sample_x,y=Sample_y)

     set.seed(143)

     data_Sample<-as.matrix(data)[sample(1:10),] 

     heatmap(data_Sample)

 

4) with(subset(airquality,Month==9),plot(Wind,Ozone,col=’steelblue

 title(‘Wind and Temperature in NYC in September of 1973’)

For Free, Demo classes Call: 8605110150
Registration Link: Click Here!

5) sample_cars<-transform(sample_cars,cyl=factor(cyl)) 

      class(sample_cars$cyl) 

      boxplot(mpg~cyl,sample_cars,xlab=’Number of                  

      Cylinders’,ylab=’miles per gallon’,main=’miles per gallon   

      for varied cylinders in automobiles’,cex.main=1.2)

 

6) corr_sample <- cor(sample_cars)

     corrplot(corr_sample)

     corrplot(corr_sample, method = ‘number’,type = “lower”)

 

7) airquality %>% 

     group_by(Day) %>% 

     summarise(mean_wind = mean(Wind)) %>% 

     ggplot() +geom_area(aes(x = Day, y = mean_wind)) + 

     labs(title = “Area Chart of Average Wind per Day”,

     subtitle = “using airquality data”,y = “Mean Wind”)

Author:-
Rahul Pund

Call the Trainer and Book your free demo Class now!!!

call icon

© Copyright 2019 | Sevenmentor Pvt Ltd.

Pin It on Pinterest

× How can I help you?