DD Example in R

Syntax

# Getting Started ####
# Written by: Ian A. Silver
# Written on: 3.22.22
# Research Idea: Latent adjustment for Probability of being exposed to Treatment
# R Version: 4.1.2 (2021-11-01)
# Platform: x86_64-pc-linux-gnu (64-bit)


options(scipen=100, digits=12)
options(max.print = 500000000)

library(doParallel)
library(pastecs)
library(lm.beta) 
library(parallel)
library(foreach)
library(data.table)
library(lme4)
library(MuMIn)
library(pbkrtest)
library(lmerTest)
library(doRNG)
library(tidyverse)
library(lavaan)
library(extrafont)
library(ggplot2)
registerDoParallel(6)


setwd("")

# Data Simulation ####
# Exposure = 0 
# Value Specifications
set.seed(238128)
N<-500
bA1<-runif(1,1,5)
bA2<-runif(1,1,5)
bA3<-runif(1,1,5)
bA4<-runif(1,1,5)
bA5<-runif(1,1,5)
bA6<-runif(1,1,5)
bA7<-runif(1,1,5)
bA8<-runif(1,1,5)
bA9<-runif(1,1,5)
bA10<-runif(1,1,5)
cA1<-runif(1,1,5)
cA2<-runif(1,1,5)
cA3<-runif(1,1,5)
cA4<-runif(1,1,5)
cA5<-runif(1,1,5)
cA6<-runif(1,1,5)
cA7<-runif(1,1,5)
cA8<-runif(1,1,5)
cA9<-runif(1,1,5)
cA10<-runif(1,1,5)
cB<--10
cE<-10
eb<-runif(1,10,20)*sample(c(-1,1),1)
ec<-runif(1,10,20)*sample(c(-1,1),1)

# Confounder Variables
A1<-rnorm(N,runif(1,0,5),runif(1,5,20))
A2<-rnorm(N,runif(1,0,5),runif(1,5,20))
A3<-rnorm(N,runif(1,0,5),runif(1,5,20))
A4<-rnorm(N,runif(1,0,5),runif(1,5,20))
A5<-rnorm(N,runif(1,0,5),runif(1,5,20))
A6<-rnorm(N,runif(1,0,5),runif(1,5,20))
A7<-rnorm(N,runif(1,0,5),runif(1,5,20))
A8<-rnorm(N,runif(1,0,5),runif(1,5,20))
A9<-rnorm(N,runif(1,0,5),runif(1,5,20))
A10<-rnorm(N,runif(1,0,5),runif(1,5,20))
# Error Terms 
eB<-rnorm(N,runif(1,0,5),runif(1,5,20))
eC<-rnorm(N,runif(1,0,5),runif(1,5,20))

# Treatment Variable (0,1)
B_C<-(bA1*A1)+(bA2*A2)+(bA3*A3)+(bA4*A4)+(bA5*A5)+  # Dependent Variable
  (bA6*A6)+(bA7*A7)+(bA8*A8)+(bA9*A9)+(bA10*A10)+(eb*eB)
summary(B_C)
B_n<-((B_C)-min(B_C-200))/((max(B_C+200))-min(B_C-200))
summary(B_n)
B<-rbinom(N,1,prob=B_n)
table(B)

E<-rep(1,500) # Exposure Time

C<-(cB*B)+(cE*E)+(cA1*A1)+(cA2*A2)+(cA3*A3)+(cA4*A4)+(cA5*A5)+
  (cA6*A6)+(cA7*A7)+(cA8*A8)+(cA9*A9)+(cA10*A10)+(ec*eC)

# Data Frame 1
DF1<-data.frame(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,B,C,E)

# Exposure = 0
set.seed(458961)
N<-500
bA1<-runif(1,1,5)
bA2<-runif(1,1,5)
bA3<-runif(1,1,5)
bA4<-runif(1,1,5)
bA5<-runif(1,1,5)
bA6<-runif(1,1,5)
bA7<-runif(1,1,5)
bA8<-runif(1,1,5)
bA9<-runif(1,1,5)
bA10<-runif(1,1,5)
cA1<-runif(1,1,5)
cA2<-runif(1,1,5)
cA3<-runif(1,1,5)
cA4<-runif(1,1,5)
cA5<-runif(1,1,5)
cA6<-runif(1,1,5)
cA7<-runif(1,1,5)
cA8<-runif(1,1,5)
cA9<-runif(1,1,5)
cA10<-runif(1,1,5)
cB<-10
cE<-10
eb<-runif(1,10,20)*sample(c(-1,1),1)
ec<-runif(1,10,20)*sample(c(-1,1),1)

# Confounder Variables
A1<-rnorm(N,runif(1,0,5),runif(1,5,20)) 
A2<-rnorm(N,runif(1,0,5),runif(1,5,20))
A3<-rnorm(N,runif(1,0,5),runif(1,5,20))
A4<-rnorm(N,runif(1,0,5),runif(1,5,20))
A5<-rnorm(N,runif(1,0,5),runif(1,5,20))
A6<-rnorm(N,runif(1,0,5),runif(1,5,20))
A7<-rnorm(N,runif(1,0,5),runif(1,5,20))
A8<-rnorm(N,runif(1,0,5),runif(1,5,20))
A9<-rnorm(N,runif(1,0,5),runif(1,5,20))
A10<-rnorm(N,runif(1,0,5),runif(1,5,20))

# Error Terms
eB<-rnorm(N,runif(1,0,5),runif(1,5,20))
eC<-rnorm(N,runif(1,0,5),runif(1,5,20))

# Treatment Variable (0,1)
B_C<-(bA1*A1)+(bA2*A2)+(bA3*A3)+(bA4*A4)+(bA5*A5)+
  (bA6*A6)+(bA7*A7)+(bA8*A8)+(bA9*A9)+(bA10*A10)+(eb*eB)
summary(B_C)
B_n<-((B_C)-min(B_C-200))/((max(B_C+200))-min(B_C-200))
summary(B_n)
B<-rbinom(N,1,prob=B_n) # Treatment Variable 
table(B)

E<-rep(2,500) # Exposure Time

C<-(cB*B)+(cE*E)+(cA1*A1)+(cA2*A2)+(cA3*A3)+(cA4*A4)+(cA5*A5)+ #Dependent Variable
  (cA6*A6)+(cA7*A7)+(cA8*A8)+(cA9*A9)+(cA10*A10)+(ec*eC)

# Data Frame 2
DF2<-data.frame(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,B,C,E)


# Data Frame

DF<-rbind(DF1,DF2)




# Propensity Score Estimation ####
PP<-predict(glm(B~A1+A2+A3, data = DF, family = binomial(link = "logit")), type="response")
DF$PP<-PP
summary(DF$PP)

# Propensity Score as ATE Weight ####
DF$ATEW[DF$B==1]<-1/DF$PP
DF$ATEW[DF$B==0]<-1/1-DF$PP
table(DF$B)
summary(DF$ATEW)

# Propensity Score as ATT Weight ####
DF$ATTW[DF$B==1]<-1
DF$ATTW[DF$B==0]<-DF$PP/(1-DF$PP)
summary(DF$ATTW)


# Model Syntax ####


M1<-lm(C~(B*E), data = DF) # Difference in Difference_No Weight B = treatment; E = Exposure
summary(M1) # Model Summary

M2<-lm(C~(B*E),data = DF, weights = ATEW) # Difference in Difference_ ATE Weight B = treatment; E = Exposure
summary(M2) # Model Summary

M3<-lm(C~(B*E),data = DF, weights = ATTW) # Difference in Difference_ ATT Weight B = treatment; E = Exposure
summary(M3) # Model Summary

Leave a comment