--- title: "Wellbeing and income re-analysis" author: "John Wilcox" date: "2024-12-25" output: html_document --- ```{r setup, include=FALSE} install.packages("rmarkdown") install.packages('readr') install.packages('tidyverse') install.packages('writexl') install.packages('readxl') install.packages('ggplot2') install.packages('Hmisc') install.packages('MKmisc') install.packages("corrplot") install.packages("ggcorrplot") install.packages("MKinfer") # knitr::opts_chunk$set(echo = TRUE) ``` ```{r setup 2, include=TRUE} # knitr::opts_chunk$set(echo = TRUE) library(rmarkdown) library(readr) library(tidyverse) library(writexl) library(readxl) library(ggplot2) library(Hmisc) library(corrplot) library(ggcorrplot) library(MKinfer) ``` ```{r} #Setting seed for reproducibility set.seed(123) # Loading data from OSF repository here: https://osf.io/qye4a/ df1 = read_csv("C:/Users/john-/Downloads/Income_and_emotional_wellbeing_a_conflict_resolved.csv") # Note: base of the log for the transformed data is approximately 2.718854203674 # Wellbeing = Mean response to “How do you feel right now?” on a continuous response scale with end points labeled “Very bad” and “Very good.” # Returning range of incomes: range is $15k to $625k range(df1$income) # Produce wellbeing quantiles from data quantile(df1$wellbeing, probs = c(.25,.5,.75,1)) # The quantiles are # 25% 50% 75% 100% # 55.8050 63.1910 71.2945 100.0000 #Make new wellbeing variables allocating individuals to four categories df1 = df1%>%mutate(wellbeing_quart = case_when(wellbeing < 55.8050 ~ "Bottom", 55.8050 <= wellbeing & wellbeing < 63.1910 ~ "2", 63.1910 <= wellbeing & wellbeing < 71.2945 ~ "3", 71.2945 <= wellbeing ~ "Top")) # Scatterplot of full data ggplot(data = df1, aes(x = log_income, y = wellbeing)) + geom_jitter(width = 0.25, size = 2, alpha = 0.2)+ geom_smooth(method = "lm")+ scale_x_continuous(name = "Income", breaks=c(9.5,10, 10.5,11,11.5,12, 12.5,13, 13.5), labels = c("$13k","$22k","$36k","$60k","$99k","$163k","$269k","$443k", "$731k")) + labs(title = "The relationship between Wellbeing and Income", caption = "Wellbeing was measured as the mean response to 'How do you feel right now?' on a continuous response scale with end points labeled 'Very bad' and 'Very good'", y = "Wellbeing")+ theme_classic() # Scatterplot of random sample ggplot(data = df1[sample(nrow(df1), 4000), ], aes(x = log_income, y = wellbeing)) + geom_jitter(width = 0.25, size = 2, alpha = 0.2)+ geom_smooth(method = "lm") + scale_x_continuous(name = "Income", breaks=c(9.5,10, 10.5,11,11.5,12, 12.5,13, 13.5), labels = c("$13k","$22k","$36k","$60k","$99k","$163k","$269k","$443k", "$731k")) + scale_y_continuous(name = "Average response: 'How do you feel right now?'", breaks=c(0,25,50,75,100), labels = c("0 ('Very bad')","25","50","75","100 ('Very good')")) + labs(title = "The relationship between Wellbeing and Income")+ theme_minimal() # Scatterplot for individuals past the $75k mark ggplot(data = df1%>%filter(log_income >11.5), aes(x = log_income, y = wellbeing)) + geom_jitter(width = 0.25, size = 2, alpha = 0.2)+ geom_smooth(method = "lm") + scale_x_continuous(name = "Income", breaks=c(9.5,10, 10.5,11,11.5,12, 12.5,13, 13.5), labels = c("$13k","$22k","$36k","$60k","$99k","$163k","$269k","$443k", "$731k")) + scale_y_continuous(name = "Average response: 'How do you feel right now?'", breaks=c(0,25,50,75,100), labels = c("0 ('Very bad')","25","50","75","100 ('Very good')")) + labs(title = "The relationship between Wellbeing and Income")+ theme_minimal() # Scatterplot for individuals reporting highest and lowest wellbeing past the $75k mark ggplot(data = df1%>%filter(log_income >11.5, wellbeing_quart != "2", wellbeing_quart != "3"), aes(x = log_income, y = wellbeing, color = wellbeing_quart)) + geom_jitter(width = 0.25, size = 2, alpha = 0.2)+ geom_smooth(method = "lm", fill = NA) + scale_x_continuous(name = "Income", breaks=c(9.5,10, 10.5,11,11.5,12, 12.5,13, 13.5), labels = c("$13k","$22k","$36k","$60k","$99k","$163k","$269k","$443k", "$731k")) + scale_y_continuous(name = "Average response: 'How do you feel right now?'", breaks=c(0,25,50,75,100), labels = c("0 ('Very bad')","25","50","75","100 ('Very good')")) + labs(title = "The relationship between Wellbeing and Income")+ theme_minimal() ```