##
##  R Tricks and Tips Talk #1
##  Presented at University of Wisconsin-Milwaukee
##  Created by Nicholas Davis on 2014-10-24 in TextMate 1.5.11
##  R version 3.0.2 (2013-09-25) -- "Frisbee Sailing"
##
##########################################################

## Example on how to load spss data and trim the white space on the country name variable
polity <- read.spss("http://democracyobserver.org/data/polity_pcf.sav", to.data.frame=T)
## The which command allows you to access certain rows of columns. Here, I want to eliminate rows from the polity data set where the year varaible is less than 1980.
polity <- polity[which(polity$year >= 1980),]
## If you want to drop a column from the data, you can do the folowwing:
polity <- polity[,-1]
## trim the white space from the right hand side of the country name variable
library(stringr)
polity$country <- str_trim(polity$country, side="right")

## Example on how to get world bank econ indicators directly in R
library(WDI)
## load the indicators
wbd <- WDIcache()
## search for a certain concept/measure in the "names" of the WDI file
WDIsearch("gdp per capita", "name", cache=wbd)
## pull out the indicators you want, making sure to list the countries and years you want
wb.gdp <- WDI(country= "all", indicator= "NY.GDP.MKTP.CD", start= 1980, end= 2012, extra=T, cache= wbd)
wb.gdppc <- WDI(country= "all", indicator= "NY.GDP.PCAP.CD", start= 1980, end= 2012, extra=T, cache= wbd)
## merge certain columns (5, 4, 3) of the two data frames of WDI together, using country and year - note that you can use merge to knit together any two data frames, as long as they have at least one common field.
wb <- merge(wb.gdp[,c(5,4,3)], wb.gdppc[,c(5,4,3)], by=c("year", "iso3c"))

## Using the countrycode package, translate the WB country code to the COW country code. You can also translate to region, abbreviation, and other identifiers for countries. Now you can use merge to create a combined dataset of polity and WDI
library(countrycode)
wb$ccode <- countrycode(wb$iso3c, "iso3c", "cown")
dat <- merge(polity, wb, by=c("year", "ccode"))

## Example of how to create dummy variables
load("mpsa14.diffusion.rda")
## using the dummies package, create dummy variables for all the WB regions in the data, and column-bind them with the data frame. Note this is similar to how merge works - but it trusts that your rows are in the same order. Merge does not, since it finds the right rows based on the "by" argument.
library(dummies)
dat <- cbind(dat, dummy("region", dat))
## if you do not need one of the columns in your data, eliminate it by setting the values to NULL
dat$"regionUnited States and Canada" <- NULL
## dummy makes stupid column names by default, so you need to fix them. This is a perfect application for "rename" - a function in the reshape package.
library(reshape)
dat <- rename(dat, c(regionAsia="asia", "regionEast Asia and Pacific Islands"="eastasia", regionEurope="europe", "regionLatin America and Carribean"="latamer", "regionMiddle East and North Africa"="mena", "regionSub-Saharan Africa"="africa"))
## I realized that Austrailia and New Zealand are in East Asia, but I want them to not be. I can find the relevant values and set them to 0:
dat$eastasia[which(dat$ccode %in% c(900,920))] <- 0

## another great use of the reshape package is recode - which is usually loaded with the car library. Here I decided that NAs should actually be zero:
dat$usaid.gdp <- recode(dat$usaid.gdp, "NA=0")

## Working with string data:
## create a new variable, such as country-year with paste:
dat$cyear <- paste(dat$ccode, dat$year, sep=":")

## create a new variable from first three characters of another string variable
dat$c_3 <- substr(dat$country, 1, 3)

## create uppercase or lowercase string variable
dat$country2 <- toupper(dat$country)

## search for a pattern in a string variable (should return all the unique countries with States in it)
unique(dat$country[grep(" States", dat$country, fixed=T)])

## make changes to a string variable
dat$country2 <- gsub("STATES", "PROVINCES", dat$country2)

## Example of how to search through STATA variable labels (Dave's code)
anes <- read.dta("http://democracyobserver.org/data/nes2008.dta")

## look through the variable labels for ANES questions mentioning "abortion"
library(DAMisc)
search.var.labels(anes, "abortion")
