Estimating Longitudinal Propensity Scores & Matching in R

LPSM R-Code; Data; Simulation & LPSM R-Code
# getting started ####

## Do Not Change

library(pastecs)
library(foreign)
library(lmerTest)
library(lme4)
library(Hmisc)
library(psych)
library(lmtest)
library(lmerTest)
library(pbkrtest)
library(MuMIn)
library(sandwich)
library(sizeMat)
library(MatchIt)
library(lm.beta)
library(data.table)

options(scipen=100, digits=12)
options(max.print = 5000)

# Importing data 

setwd("") # Set the Working Directory for the CSV file
D<-read.csv("LPSM Data.csv", # Make sure you appropriately change the location
            header = T, # Tells r the rist row is the variable names
            sep = ",") # Tells r that the seperating character in the dataset is a ,


# Estimating the true slope coefficient for the association between X1-X5 and t ####
## All error terms used to create t1-t12 must be included in the equation to account for the existing error
## The true coefficients between X1-X5 and t will be different then the specifications above since we have dichotomized t and are estimating the association with a binary logistic regression model.


blr_True<-glm(t~X1_n+X2_n+X3_n+X4_n+X5_n+eRI_n+e2_n+e3_n+e4_n+e5_n+e6_n+e7_n+e8_n+e9_n+e10_n+e11_n+e12_n, data = D, family= binomial(link= logit))

summary(blr_True) # provides the results

# Estimating the biased slope coefficient for the association between X1-X5 and t with a BLR model without the error terms include ####

blr<-glm(t~X1_n+X2_n+X3_n+X4_n+X5_n, data = D,  family = binomial(link= logit), na.action = na.exclude)

summary(blr)

## Creating the predicted probabilities from the biased BLR model specified in line 210 ####

D$BLR_PP<-predict(blr, type = "response", na.rm = TRUE)
mean(D$BLR_PP)

# Creating a long dataset for the longitudinal analysis ####

names(D)
L<-reshape(D, varying =c("t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "t10", "t11", "t12"), v.names = "Treatment", timevar = "Wave", direction = "long")

# Estimating the biased slope coefficient for the association between X1-X5 and t with a RIS model without the error terms include ####

RIS<-glmer(Treatment~Wave+ X1_n+X2_n+X3_n+X4_n+X5_n + (1+Wave|id), data = L, family = binomial(link= logit))

summary(RIS)

## Creating the predicted probabilities from the biased RIS model specified in line 232 ####

L.RIS<- unique(L[,c("X1_n", "X2_n", "X3_n", "X4_n", "X5_n", "id","ID", "Wave")])

L.RIS$PP_RIS<- predict(RIS, newdata=L.RIS, type="response", allow.new.levels = TRUE,
                       re.form=NULL, na.action= na.exclude)


# Reshaping the model back into a wide dataframe ####
S3.DF<-reshape(L.RIS, idvar = "ID", timevar = "Wave", direction = "wide")


# Merging the dataframe with the existing dataframe ####

PP<-merge(D, S3.DF, c("ID"), all.x = TRUE)

# Creating the average predicted probability for the longitudinally estimated predicted probabilities ####

PP$Ave_PP_RIS<-rowMeans(PP[,c("PP_RIS.1","PP_RIS.2","PP_RIS.3","PP_RIS.4","PP_RIS.5","PP_RIS.6",
                              "PP_RIS.7","PP_RIS.8","PP_RIS.9","PP_RIS.10","PP_RIS.11","PP_RIS.12")], na.rm = TRUE)


# Propensity score matching using the predicted probabilities created from the BLR model ####
## Matches were done, using nearest neighbor matching without replacement, in random order and a caliper of .05

set.seed(1992)
BLR_M<-matchit(t~BLR_PP, data = PP, method = "nearest", replace = F, distance = PP$BLR_PP, m.order = "random", ratio = 1, caliper = .05)

print(summary(BLR_M, standardize = TRUE), digits = 5) # Printing results


# Propensity score matching using the predicted probabilities created from the RIS model ####
## Matches were done, using nearest neighbor matching without replacement, in random order and a caliper of .05


set.seed(1992)
RIS_M<-matchit(t~Ave_PP_RIS, data = PP, method = "nearest", replace = F, distance = PP$Ave_PP_RIS, m.order = "random", ratio = 1, caliper = .05)

print(summary(RIS_M, standardize = TRUE), digits = 5) # Printing results

# Creating matched datasets (i.e., only the matched participants are included in the dataset) ####

BLR_M.DF<-match.data(BLR_M) # BLR matched data

RIS_M.DF<-match.data(RIS_M) # RIS matched data

# Assessing post-matching balance for the BLR matched data ####

Bal.BLR.05<-bal.stat(BLR_M.DF, vars = c("X1_n","X2_n", "X3_n","X4_n","X5_n"),
                     treat.var = "t", w.all = 1, sampw = 1, estimand = "ATT", 
                     get.means = T, multinom = F)


print(Bal.BLR.05$results) # printing the results of the post-matching balance for the BLR matched data

# Assessing post-matching balance for the RIS matched data ####

Bal.RIS.05<-bal.stat(RIS_M.DF, vars = c("X1_n","X2_n", "X3_n","X4_n","X5_n"),
                     treat.var = "t", w.all = 1, sampw = 1, estimand = "ATT", 
                     get.means = T, multinom = F)


print(Bal.RIS.05$results) # printing the results of the post-matching balance for the RIS matched data


# Post matching estimation of the association between y and t (true = 1.00) #####

print("BLR_Post PSM") # Analysis pertaining to the BLR matches
summary(lm(y~t, data = BLR_M.DF))
lm.beta(lm(y~t, data = BLR_M.DF))
confint(lm(y~t, data = BLR_M.DF), level = .95)


print("RIS_Post PSM") # Analysis pertaining to the RIS matches
summary(lm(y~t, data = RIS_M.DF))
lm.beta(lm(y~t, data = RIS_M.DF))
confint(lm(y~t, data = RIS_M.DF), level = .95)

# End of Code
Share this: