Saturday, January 19, 2019

An example of R coding "Analyzing Google Trends with R" by David Krause, Marquette AIM Program director

This is an example of an R coding data analytics assignment in FINA 4931: FinTech Topics taught by David Krause, Marquette University AIM Program Director




#Analyzing Google Trends with R by David Krause, Marquette AIM Program director

#load the following:
library(gtrendsR)
library(reshape2)
library(ggplot2)

#define the keywords to analyze from Google Trends

keywords=c("Amazon","Walmart","Target")
#set the geographic area: US = United States
country=c("US")
#set the time window
time=("2014-01-01 2018-12-31")
#set channels 
channel='web'

#access the data from Google Trends
trends = gtrends(keywords, gprop =channel,geo=country, time = time )
#select Google Trends Interest over Time (similar to hits) 
time_trend=trends$interest_over_time
#display the first several data items
head(time_trend)

     date         hits keyword geo gprop category
1 2014-01-05   50  Amazon  US   web        0
2 2014-01-12   48  Amazon  US   web        0
3 2014-01-19   48  Amazon  US   web        0
4 2014-01-26   45  Amazon  US   web        0
5 2014-02-02   43  Amazon  US   web        0

6 2014-02-09   45  Amazon  US   web        0


#plot the data items
plot<-ggplot aes="" data="time_trend," x="date, </font">




y=hits,group=keyword,col=keyword))+
  geom_line()+xlab('Time')+ylab('Relative Interest')+ theme_bw()+
  theme(legend.title = element_blank(),legend.position="bottom",legend.text=element_text(size=12))+ggtitle("Google Search Volume")
plot
#Outliers can distort the analysis and this is a way to remove outliers (hits greater than 80)
time_trend2=time_trend[time_trend$hits<80 font="">
plot<-ggplot aes="" data="time_trend2," x="date," y="hits,group=keyword,col=keyword))+</font">
  geom_line()+xlab('Time')+ylab('Relative Interest')+ theme_bw()+
  theme(legend.title = element_blank(),legend.position="bottom",legend.text=element_text(size=12))+ggtitle("Google Search Volume – Outliers Removed")
plot




#If there is seasonality, then geometric smoothing can assist with the analysis
plot<-ggplot aes="" data="time_trend2," x="date," y="hits,group=keyword,col=keyword))+</font">
  geom_smooth(span=0.5,se=FALSE)+xlab('Time')+ylab('Relative Interest')+
  theme_bw()+theme(legend.title = element_blank(),legend.position="bottom",
                   legend.text=element_text(size=12))+ggtitle("Google Search Volume")
plot





#Here is a short-cut to run plots of Google Trends search data
plot(gtrendsR::gtrends(keyword = c("Walmart","Amazon","Target"), geo = "US", time = "2014-01-01 2018-12-31"))



#It is possible to look at search times by geography (i.e. Wisconsin is US-WI)
plot(gtrendsR::gtrends(keyword = c("Walmart","Amazon","Target"), geo = "US-WI", time = "2014-01-01 2018-12-31"))