sis <- read.dta13("/Users/christineslaughter/Library/CloudStorage/Dropbox/Boston U (2023-2024)/SistersInPolitics/Sisters in Politics Survey Data (April 2022)/2022-04-18 - Qualtrics - Black Women Politics Clean Data.dta", convert.factors = F, generate.factors = F,encoding = "UTF-8", fromEncoding = NULL, convert.underscore = FALSE,missing.type = FALSE, convert.dates = F, replace.strl = TRUE,add.rownames = FALSE, nonint.factors = TRUE)
sis <- read.dta13("/Users/christineslaughter/Library/CloudStorage/Dropbox/Boston U (2023-2024)/SistersInPolitics/Sisters in Politics Survey Data (April 2022)/2022-04-18 - Qualtrics - Black Women Politics Clean Data.dta", convert.factors = F, generate.factors = F,encoding = "UTF-8", fromEncoding = NULL, convert.underscore = FALSE,missing.type = FALSE, convert.dates = F, replace.strl = TRUE,add.rownames = FALSE, nonint.factors = TRUE)
#set all names to lowercase 
sis <- setNames(sis, tolower(names(sis[1:287])))
#setwd
setwd("~/Dropbox/Boston U (2023-2024)/SistersInPolitics")
Warning: The working directory was changed to /Users/christineslaughter/Library/CloudStorage/Dropbox/Boston U (2023-2024)/SistersInPolitics inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
table(sis$q33)

   1    2    3    4    5 
1241   74  255  594  120 
sis$news[sis$q33 == 1] <- "Television News"
sis$news[sis$q33 == 2] <- "Newspapers" 
sis$news[sis$q33 == 3] <- "Television Talk Shows"
sis$news[sis$q33 == 4] <- "Internet"
sis$news[sis$q33 == 5] <- "Radio"


table(sis$news)

             Internet            Newspapers                 Radio       Television News 
                  594                    74                   120                  1241 
Television Talk Shows 
                  255 
library(ggthemes)
library(hrbrthemes)
sis_news <- sis %>%
  dplyr::select(news) %>%
  count(news) %>%
mutate(per = round(n/sum(n), digits=2))
  

#plot
sis_news_plot <- ggplot(data= sis_news, aes(x=reorder(news,per),y=per)) +
  geom_bar(stat= "identity") +
  labs(x = "", y = "%", title = "Black Women's News Source By Media Type", caption = "Black Women in Politics Survey (N= 2,284)\n April 2022")  + theme(legend.position = "bottom") + 
   scale_y_continuous(labels = scales::percent) + 
 theme_ipsum() + 
   geom_text(aes(x = news, y =per, label =sprintf("%0.0f%%", per*100), vjust =0),position=position_stack(vjust=0.5), colour= "White")

sis_news_plot

ggsave(sis_news_plot, width =7, height = 5, filename = "sis_news_plot.jpeg")   

NA
NA

table(sis$q81)

  1   2   3   4   5   6   7 
 14  83 561 750 318 370 188 
table(sis$education)
< table of extent 0 >
sis$education <- NULL

sis$education[sis$q81 == 1] <- "Less Than High School"
sis$education[sis$q81 == 2] <- "Less Than High School"
sis$education[sis$q81 == 3] <- "High School Diploma"
sis$education[sis$q81 == 4] <- "Some College/Associates"
sis$education[sis$q81 == 5] <- "Some College/Associates"
sis$education[sis$q81 == 6] <- "College Degree"
sis$education[sis$q81 == 7] <- "Professional Degree"


sis$education <- as.factor(sis$education)

sis$education <- ordered(sis$education, levels = c("Less Than High School", "High School Diploma", "Some College/Associates", "College Degree", "Professional Degree"))

top_edu_news <- sis %>%
  dplyr::select(news, education, q81)%>%
  group_by(education) %>% count(news) %>%
  mutate(per = round(n/sum(n), digits=2)) 

top_edu_news
# position=position_dodge() 

top_edu_news_plot <- top_edu_news %>%
  ggplot(aes(fill = education, x=reorder(news,per),per)) + 
  geom_bar(stat='identity',position=position_dodge(width =0.89)) +
  scale_y_continuous(labels = scales::percent) +
  theme(legend.position="bottom") +  
 # coord_flip() + 
 theme_ipsum() + 
   theme(legend.position = "bottom") + guides(fill = guide_legend(nrow = 2)) + 
  labs(x= "  ", y = "   ", title = "Black Womens News Consumption by Education Levels", caption = "Black Women in Politics Survey (N= 2,284)\n April 2022", fill = "")  + geom_text(aes(x = news, y = per, label =sprintf("%0.0f%%", per*100)), size = 2.5,  position=position_dodge(width =0.89), colour = "White", vjust = 2)

top_edu_news_plot

ggsave(top_edu_news_plot, width =9, height = 7, filename = "top_edu_news_plot.jpeg")   

table(sis$q26)

   1    2 
 613 1671 
sis$post_sm[sis$q26 == 2] <- "Do Not Post"
sis$post_sm[sis$q27 == 1] <- "All the time"
sis$post_sm[sis$q27 == 2] <- "Often"
sis$post_sm[sis$q27 == 3] <- "Sometimes"


sis$post_sm <- as.factor(sis$post_sm)

sis$post_sm <- ordered(sis$post_sm, levels = c("All the time", "Often", "Sometimes", "Do Not Post"))

table(sis$post_sm)

All the time        Often    Sometimes  Do Not Post 
          86          180          347         1671 
sis_sm <- sis %>%
  dplyr::select(post_sm) %>%
  count(post_sm) %>%
mutate(per = round(n/sum(n), digits=2))
  

#plot
sis_sm_plot <- ggplot(data= sis_sm, aes(x=reorder(post_sm,per),y=per)) +
  geom_bar(stat= "identity") +
  labs(x = "", y = "%", title = "Black Women's Social Media Post Frequency", caption = "Black Women in Politics Survey (N= 2,284)\n April 2022")  + theme(legend.position = "bottom") + 
   scale_y_continuous(labels = scales::percent) + 
 theme_ipsum() + 
   geom_text(aes(x = post_sm, y =per, label =sprintf("%0.0f%%", per*100), vjust =0),position=position_stack(vjust=0.5), colour= "White")

sis_sm_plot

ggsave(sis_sm_plot, width =7, height = 5, filename = "sis_sm_plot.jpeg")   



table(sis$q82)

  1   2   3   4   5   6   7   8   9  10  11  12 
310 255 335 251 198 242 157 186  62  79 145  64 
sis$income <- NULL

sis$income[sis$q82 == 1] <- "Less than $10k"
sis$income[sis$q82 == 2] <- "$10k - $30k"
sis$income[sis$q82 == 3] <- "$10k - $30k"
sis$income[sis$q82 == 4 | sis$q82 == 5] <- "$30k - $50k"
sis$income[sis$q82 == 6 | sis$q82 == 7] <- "$50k - $70k"
sis$income[sis$q82 == 8 | sis$q82 == 9 | sis$q82 == 10] <- "$70k - $100k"
sis$income[sis$q82 >= 11] <- "More than $100k"

table(sis$income)

    $10k - $30k     $30k - $50k     $50k - $70k    $70k - $100k  Less than $10k More than $100k 
            590             449             399             327             310             209 
sis$income <- as.factor(sis$income)

sis$income <- ordered(sis$income, levels = c("Less than $10k",  "$10k - $30k", "$30k - $50k", "$50k - $70k", "$70k - $100k", "More than $100k"))

top_inc_post <- sis %>%
  dplyr::select(post_sm, income, q81)%>%
  group_by(income) %>% count(post_sm) %>%
  mutate(per = round(n/sum(n), digits=2)) %>% 
  print(n=24)

top_inc_post
# position=position_dodge() 

top_inc_post_plot <- top_inc_post %>%
  ggplot(aes(fill = income, x=reorder(post_sm,per),per)) + 
  geom_bar(stat='identity',position=position_dodge(width =0.89)) +
  scale_y_continuous(labels = scales::percent) +
  theme(legend.position="bottom") +  
 # coord_flip() + 
 theme_ipsum() + 
   theme(legend.position = "bottom") + guides(fill = guide_legend(nrow = 2)) + 
  labs(x= "  ", y = "   ", title = "Black Women's Social Media Engagement by Income Levels", caption = "Black Women in Politics Survey (N= 2,284)\n April 2022", fill = "")  + geom_text(aes(x = post_sm, y = per, label =sprintf("%0.0f%%", per*100)), size = 2.5,  position=position_dodge(width =0.89), colour = "White", vjust = 2)

top_inc_post_plot

ggsave(top_inc_post_plot, width =9, height = 7, filename = "top_inc_post_plot.jpeg")   

NA
NA
NA
NA

---
title: "R Notebook"
output: html_notebook
---



```{r warning=TRUE, include=FALSE}
library(tidyverse)
library(haven)
library(ggplot2)
library(labelled)
library(stargazer) 
library(skimr)
library(readstata13)

# Resolve conflicts

conflicted::conflict_prefer("select", "dplyr")
conflicted::conflict_prefer("filter", "dplyr")
conflicted::conflict_prefer("count", "dplyr")
conflicted::conflict_prefer("mutate", "dplyr")

addline_format <- function(x,...){
    gsub('\\s','\n',x)
}

rescale_01 <- function(x, min, max){
  # Normalizes a vector to [0,1]
  (x - min) / (max - min)
}

data_summary <- function(x) { # summary stats function
   m <- mean(x)
   ymin <- m-sd(x)
   ymax <- m+sd(x)
   return(c(y=m,ymin=ymin,ymax=ymax))
} 


``` 


```{r using black women in politics data}

sis <- read.dta13("/Users/christineslaughter/Library/CloudStorage/Dropbox/Boston U (2023-2024)/SistersInPolitics/Sisters in Politics Survey Data (April 2022)/2022-04-18 - Qualtrics - Black Women Politics Clean Data.dta", convert.factors = F, generate.factors = F,encoding = "UTF-8", fromEncoding = NULL, convert.underscore = FALSE,missing.type = FALSE, convert.dates = F, replace.strl = TRUE,add.rownames = FALSE, nonint.factors = TRUE)

#set all names to lowercase 
sis <- setNames(sis, tolower(names(sis[1:287])))

#setwd
setwd("~/Dropbox/Boston U (2023-2024)/SistersInPolitics")

```



```{r recode vars }
table(sis$q33)
sis$news[sis$q33 == 1] <- "Television News"
sis$news[sis$q33 == 2] <- "Newspapers" 
sis$news[sis$q33 == 3] <- "Television Talk Shows"
sis$news[sis$q33 == 4] <- "Internet"
sis$news[sis$q33 == 5] <- "Radio"


table(sis$news)
```


```{r where get news}
library(ggthemes)
library(hrbrthemes)
sis_news <- sis %>%
  dplyr::select(news) %>%
  count(news) %>%
mutate(per = round(n/sum(n), digits=2))
  

#plot
sis_news_plot <- ggplot(data= sis_news, aes(x=reorder(news,per),y=per)) +
  geom_bar(stat= "identity") +
  labs(x = "", y = "%", title = "Black Women's News Source By Media Type", caption = "Black Women in Politics Survey (N= 2,284)\n April 2022")  + theme(legend.position = "bottom") + 
   scale_y_continuous(labels = scales::percent) + 
 theme_ipsum() + 
   geom_text(aes(x = news, y =per, label =sprintf("%0.0f%%", per*100), vjust =0),position=position_stack(vjust=0.5), colour= "White")

sis_news_plot

ggsave(sis_news_plot, width =7, height = 5, filename = "sis_news_plot.jpeg")   


``` 



```{r top news by education}

table(sis$q81)
table(sis$education)
sis$education <- NULL

sis$education[sis$q81 == 1] <- "Less Than High School"
sis$education[sis$q81 == 2] <- "Less Than High School"
sis$education[sis$q81 == 3] <- "High School Diploma"
sis$education[sis$q81 == 4] <- "Some College/Associates"
sis$education[sis$q81 == 5] <- "Some College/Associates"
sis$education[sis$q81 == 6] <- "College Degree"
sis$education[sis$q81 == 7] <- "Professional Degree"


sis$education <- as.factor(sis$education)

sis$education <- ordered(sis$education, levels = c("Less Than High School", "High School Diploma", "Some College/Associates", "College Degree", "Professional Degree"))

top_edu_news <- sis %>%
  dplyr::select(news, education, q81)%>%
  group_by(education) %>% count(news) %>%
  mutate(per = round(n/sum(n), digits=2)) 

top_edu_news
# position=position_dodge() 

top_edu_news_plot <- top_edu_news %>%
  ggplot(aes(fill = education, x=reorder(news,per),per)) + 
  geom_bar(stat='identity',position=position_dodge(width =0.89)) +
  scale_y_continuous(labels = scales::percent) +
  theme(legend.position="bottom") +  
 # coord_flip() + 
 theme_ipsum() + 
   theme(legend.position = "bottom") + guides(fill = guide_legend(nrow = 2)) + 
  labs(x= "  ", y = "   ", title = "Black Womens News Consumption by Education Levels", caption = "Black Women in Politics Survey (N= 2,284)\n April 2022", fill = "")  + geom_text(aes(x = news, y = per, label =sprintf("%0.0f%%", per*100)), size = 2.5,  position=position_dodge(width =0.89), colour = "White", vjust = 2)

top_edu_news_plot

ggsave(top_edu_news_plot, width =9, height = 7, filename = "top_edu_news_plot.jpeg")   

```


```{r income x posting on social media}
table(sis$q26)

sis$post_sm[sis$q26 == 2] <- "Do Not Post"
sis$post_sm[sis$q27 == 1] <- "All the time"
sis$post_sm[sis$q27 == 2] <- "Often"
sis$post_sm[sis$q27 == 3] <- "Sometimes"


sis$post_sm <- as.factor(sis$post_sm)

sis$post_sm <- ordered(sis$post_sm, levels = c("All the time", "Often", "Sometimes", "Do Not Post"))

table(sis$post_sm)


sis_sm <- sis %>%
  dplyr::select(post_sm) %>%
  count(post_sm) %>%
mutate(per = round(n/sum(n), digits=2))
  

#plot
sis_sm_plot <- ggplot(data= sis_sm, aes(x=reorder(post_sm,per),y=per)) +
  geom_bar(stat= "identity") +
  labs(x = "", y = "%", title = "Black Women's Social Media Post Frequency", caption = "Black Women in Politics Survey (N= 2,284)\n April 2022")  + theme(legend.position = "bottom") + 
   scale_y_continuous(labels = scales::percent) + 
 theme_ipsum() + 
   geom_text(aes(x = post_sm, y =per, label =sprintf("%0.0f%%", per*100), vjust =0),position=position_stack(vjust=0.5), colour= "White")

sis_sm_plot

ggsave(sis_sm_plot, width =7, height = 5, filename = "sis_sm_plot.jpeg")   


table(sis$q82)
sis$income <- NULL

sis$income[sis$q82 == 1] <- "Less than $10k"
sis$income[sis$q82 == 2] <- "$10k - $30k"
sis$income[sis$q82 == 3] <- "$10k - $30k"
sis$income[sis$q82 == 4 | sis$q82 == 5] <- "$30k - $50k"
sis$income[sis$q82 == 6 | sis$q82 == 7] <- "$50k - $70k"
sis$income[sis$q82 == 8 | sis$q82 == 9 | sis$q82 == 10] <- "$70k - $100k"
sis$income[sis$q82 >= 11] <- "More than $100k"

table(sis$income)

sis$income <- as.factor(sis$income)

sis$income <- ordered(sis$income, levels = c("Less than $10k",  "$10k - $30k", "$30k - $50k", "$50k - $70k", "$70k - $100k", "More than $100k"))

top_inc_post <- sis %>%
  dplyr::select(post_sm, income, q81)%>%
  group_by(income) %>% count(post_sm) %>%
  mutate(per = round(n/sum(n), digits=2)) %>% 
  print(n=24)

top_inc_post
# position=position_dodge() 

top_inc_post_plot <- top_inc_post %>%
  ggplot(aes(fill = income, x=reorder(post_sm,per),per)) + 
  geom_bar(stat='identity',position=position_dodge(width =0.89)) +
  scale_y_continuous(labels = scales::percent) +
  theme(legend.position="bottom") +  
 # coord_flip() + 
 theme_ipsum() + 
   theme(legend.position = "bottom") + guides(fill = guide_legend(nrow = 2)) + 
  labs(x= "  ", y = "   ", title = "Black Women's Social Media Engagement by Income Levels", caption = "Black Women in Politics Survey (N= 2,284)\n April 2022", fill = "")  + geom_text(aes(x = post_sm, y = per, label =sprintf("%0.0f%%", per*100)), size = 2.5,  position=position_dodge(width =0.89), colour = "White", vjust = 2)

top_inc_post_plot

ggsave(top_inc_post_plot, width =9, height = 7, filename = "top_inc_post_plot.jpeg")   




```

```{r what platforms are black women posting}
#of the 613 Black women that post political messages on socia media 
class(sis$q28_1)
table(sis$post_fb)


# RENAME ITEMS 
sis$post_fb <- ifelse(!is.na(sis$q28_1),1,0)

sis$post_twitter <-ifelse(!is.na(sis$q28_2),1, 0)
sis$post_ig <- ifelse(!is.na(sis$q28_3), 1, 0)
sis$post_tiktok <-ifelse(!is.na(sis$q28_4 ),1, 0)
sis$post_snap <- ifelse(!is.na(sis$q28_5), 1, 0)
sis$post_medium <-ifelse(!is.na(sis$q28_6 ),1, 0)
sis$post_other <- ifelse(!is.na(sis$q28_7), 1, 0)

table(sis$post_other)



post_platform<- sis %>%
  dplyr::select(starts_with("post_")) %>%
  filter(post_sm != "Do Not Post") %>%
  dplyr::summarise(post_fb = mean(post_fb == 1, na.rm=T), 
                   post_twitter = mean(post_twitter == 1, na.rm=T),
                   post_ig = mean(post_ig == 1, na.rm =T), 
                   post_tiktok = mean(post_tiktok == 1, na.rm=T),
                   post_snap = mean(post_snap == 1, na.rm=T),
                   post_medium = mean(post_medium == 1, na.rm=T),
                   post_other = mean(post_other == 1, na.rm=T)) %>%
  gather(q,r,1:7) %>%
  mutate(r = round(r, digits=2))

post_platform
post_platform_plot <- post_platform %>%
  ggplot(aes(x=reorder(q,r),r))  + 
  geom_bar(stat='identity',position=position_dodge(width =0.89)) +
  scale_y_continuous(labels = scales::percent) +
 theme_ipsum() + 
   theme(legend.position = "bottom") + guides(fill = guide_legend(nrow = 2)) + 
  labs(x= "  ", y = "   ", title = "Black Women's Social Media Engagement by Platform", caption = "Black Women in Politics Survey (N= 2,284)\n April 2022", fill = "")  + geom_text(aes(x = q, y = r, label =sprintf("%0.0f%%", r*100)), size = 2.5,  position=position_dodge(width =0.89), colour = "White", vjust = 2)



post_platform_plot


ggsave(post_platform_plot, width =9, height = 7, filename = "post_platform_plot.jpeg")   




```



```{r post platform by income}

inc_post_platform<- sis %>%
  dplyr::select(starts_with("post_"), income) %>%
  filter(post_sm != "Do Not Post") %>%
  group_by(income) %>% 
  dplyr::summarise(post_fb = mean(post_fb == 1, na.rm=T), 
                   post_twitter = mean(post_twitter == 1, na.rm=T),
                   post_ig = mean(post_ig == 1, na.rm =T), 
                   post_tiktok = mean(post_tiktok == 1, na.rm=T),
                   post_snap = mean(post_snap == 1, na.rm=T),
                   post_medium = mean(post_medium == 1, na.rm=T),
                   post_other = mean(post_other == 1, na.rm=T)) %>%
  gather(q,r,1:7) %>%
  mutate(r = round(r, digits=2))


top_inc_post
# position=position_dodge() 

top_inc_post_plot <- top_inc_post %>%
  ggplot(aes(fill = income, x=reorder(post_sm,per),per)) + 
  geom_bar(stat='identity',position=position_dodge(width =0.89)) +
  scale_y_continuous(labels = scales::percent) +
  theme(legend.position="bottom") +  
 # coord_flip() + 
 theme_ipsum() + 
   theme(legend.position = "bottom") + guides(fill = guide_legend(nrow = 2)) + 
  labs(x= "  ", y = "   ", title = "Black Women's Social Media Engagement by Income Levels", caption = "Black Women in Politics Survey (N= 2,284)\n April 2022", fill = "")  + geom_text(aes(x = post_sm, y = per, label =sprintf("%0.0f%%", per*100)), size = 2.5,  position=position_dodge(width =0.89), colour = "White", vjust = 2)

top_inc_post_plot

ggsave(top_inc_post_plot, width =9, height = 7, filename = "top_inc_post_plot.jpeg")   




```

