Loading, setting up

d_to_model <- params$d_to_model

State-level descriptives

distinct_states <- d_to_model %>% 
  distinct(state, .keep_all = TRUE) %>% 
  select(-user_id, -screen_name)

Descriptives

distinct_states %>% 
  select(n_tweets_at_state_level,
         n_tweets_by_user_in_state,
         voices, 
         full_time_equivalent_fte_teachers_state_2015_16,
         c_ideology,
         g_ideology,
         pct_students_frpl,
         teacher_student_ratio,
         state_spending_per_child,
         time_of_account
  ) %>% 
  skimr::skim()
Data summary
Name Piped data
Number of rows 45
Number of columns 10
_______________________
Column type frequency:
numeric 10
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
n_tweets_at_state_level 0 1.00 13693.51 18977.36 318.00 2611.00 7237.00 17220.00 89588.00 ▇▂▁▁▁
n_tweets_by_user_in_state 0 1.00 32.91 42.99 1.00 3.00 28.00 40.00 261.00 ▇▁▁▁▁
voices 0 1.00 1903.71 2476.13 104.00 458.00 933.00 2177.00 12799.00 ▇▂▁▁▁
full_time_equivalent_fte_teachers_state_2015_16 0 1.00 67316.13 69165.03 7653.43 22702.01 50236.80 84181.03 347328.60 ▇▂▁▁▁
c_ideology 0 1.00 51.56 16.22 23.98 41.91 50.69 62.58 97.00 ▃▇▅▂▁
g_ideology 0 1.00 39.32 17.35 18.11 24.73 31.39 53.64 69.95 ▇▂▃▂▃
pct_students_frpl 1 0.98 0.48 0.10 0.28 0.41 0.47 0.57 0.75 ▃▇▅▆▁
teacher_student_ratio 0 1.00 15.56 2.99 11.82 13.56 15.06 16.87 23.63 ▇▇▃▁▂
state_spending_per_child 0 1.00 13.18 3.78 7.19 10.94 12.66 15.02 25.07 ▅▇▅▁▁
time_of_account 0 1.00 3.33 2.36 0.26 1.37 2.77 4.83 8.85 ▇▇▅▂▂

Correlations

distinct_states %>% 
  select(n_tweets_at_state_level,
         n_tweets_by_user_in_state,
         voices, 
         full_time_equivalent_fte_teachers_state_2015_16,
         c_ideology,
         g_ideology,
         pct_students_frpl,
         teacher_student_ratio,
         state_spending_per_child,
         time_of_account
  ) %>% 
  corrr::correlate() %>% 
  arrange(desc(n_tweets_at_state_level)) %>% 
  corrr::shave() %>% 
  corrr::fashion() %>% 
  knitr::kable()
## 
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'
rowname n_tweets_at_state_level n_tweets_by_user_in_state voices full_time_equivalent_fte_teachers_state_2015_16 c_ideology g_ideology pct_students_frpl teacher_student_ratio state_spending_per_child time_of_account
voices .92
full_time_equivalent_fte_teachers_state_2015_16 .42 -.05
pct_students_frpl .22 -.06 .25
teacher_student_ratio .08 -.04 .07 .15
n_tweets_by_user_in_state -.00 -.05 -.05 -.01
time_of_account -.04 .09 -.01 .11 -.06 -.09
state_spending_per_child -.18 .04 -.09 .10 .52 .57 -.50
c_ideology -.24 -.01 -.11 .17 .77 -.16 -.12
g_ideology -.27 -.04 -.18 .05 .77 -.31 -.08 .57
n_tweets_at_state_level -.00 .92 .42 -.24 -.27 .22 .08 -.18 -.04

Analysis - n tweets at state level

Some linear models

distinct_states$pct_students_frpl_std <- as.vector(scale(distinct_states$pct_students_frpl * 100))
distinct_states$voices_std <- as.vector(scale(distinct_states$voices))

distinct_states$full_time_equivalent_fte_teachers_state_2015_16_std <- as.vector(scale(distinct_states$full_time_equivalent_fte_teachers_state_2015_16))

distinct_states$c_ideology_std <- as.vector(scale(distinct_states$c_ideology))

distinct_states$g_ideology_std <- as.vector(scale(distinct_states$g_ideology))

distinct_states$teacher_student_ratio_std <- as.vector(scale(distinct_states$teacher_student_ratio))

distinct_states$state_spending_per_child_std <- as.vector(scale(distinct_states$state_spending_per_child))
m_state_level <- glm(n_tweets_at_state_level ~ 1 + 
                       
                       voices_std + 
                       full_time_equivalent_fte_teachers_state_2015_16_std +
                       
                       c_ideology_std + 
                       g_ideology_std + 
                       
                       pct_students_frpl_std + 
                       teacher_student_ratio_std + 
                       state_spending_per_child_std,
                     data = distinct_states, 
                     family = "poisson")

sjPlot::tab_model(m_state_level, show.std = "std")
  n tweets at state level
Predictors Incidence Rate Ratios std. Beta CI standardized CI p
(Intercept) 9049.12 9.11 9018.47 – 9079.83 9.11 – 9.11 <0.001
voices_std 2.71 1.00 2.70 – 2.72 1.00 – 1.01 <0.001
full_time_equivalent_fte_teachers_state_2015_16_std 0.61 -0.51 0.60 – 0.61 -0.51 – -0.50 <0.001
c_ideology_std 1.08 0.08 1.08 – 1.09 0.08 – 0.09 <0.001
g_ideology_std 0.80 -0.22 0.80 – 0.80 -0.23 – -0.22 <0.001
pct_students_frpl_std 1.01 0.01 1.00 – 1.01 0.00 – 0.01 <0.001
teacher_student_ratio_std 1.43 0.33 1.43 – 1.44 0.33 – 0.34 <0.001
state_spending_per_child_std 1.43 0.35 1.42 – 1.43 0.34 – 0.35 <0.001
Observations 44
R2 Nagelkerke 1.000
performance::check_model(m_state_level)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 5 rows containing missing values (geom_text_repel).

margins::margins(m_state_level)
## Average marginal effects
## glm(formula = n_tweets_at_state_level ~ 1 + voices_std + full_time_equivalent_fte_teachers_state_2015_16_std +     c_ideology_std + g_ideology_std + pct_students_frpl_std +     teacher_student_ratio_std + state_spending_per_child_std,     family = "poisson", data = distinct_states)
##  voices_std full_time_equivalent_fte_teachers_state_2015_16_std c_ideology_std
##       13961                                               -7023           1140
##  g_ideology_std pct_students_frpl_std teacher_student_ratio_std
##           -3112                 108.2                      5021
##  state_spending_per_child_std
##                          4976

Without voices - use this one

m_state_level_no_voices <- glm(n_tweets_at_state_level ~ 1 + 
                       
                       # voices_std + 
                       full_time_equivalent_fte_teachers_state_2015_16_std +
                       
                       c_ideology_std + 
                       g_ideology_std + 
                       
                       pct_students_frpl_std + 
                       teacher_student_ratio_std + 
                       state_spending_per_child_std,
                     data = distinct_states, 
                     family = "poisson")

sjPlot::tab_model(m_state_level_no_voices, show.std = "std")
  n tweets at state level
Predictors Incidence Rate Ratios std. Beta CI standardized CI p
(Intercept) 11486.41 9.34 11452.45 – 11520.42 9.33 – 9.34 <0.001
full_time_equivalent_fte_teachers_state_2015_16_std 1.46 0.38 1.45 – 1.46 0.38 – 0.38 <0.001
c_ideology_std 0.69 -0.38 0.68 – 0.69 -0.39 – -0.38 <0.001
g_ideology_std 0.83 -0.19 0.82 – 0.83 -0.20 – -0.19 <0.001
pct_students_frpl_std 1.04 0.04 1.04 – 1.04 0.04 – 0.04 <0.001
teacher_student_ratio_std 1.12 0.11 1.12 – 1.13 0.10 – 0.11 <0.001
state_spending_per_child_std 1.03 0.03 1.02 – 1.03 0.02 – 0.03 <0.001
Observations 44
R2 Nagelkerke 1.000
performance::check_model(m_state_level_no_voices)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_text_repel).

margins::margins(m_state_level_no_voices)
## Average marginal effects
## glm(formula = n_tweets_at_state_level ~ 1 + full_time_equivalent_fte_teachers_state_2015_16_std +     c_ideology_std + g_ideology_std + pct_students_frpl_std +     teacher_student_ratio_std + state_spending_per_child_std,     family = "poisson", data = distinct_states)
##  full_time_equivalent_fte_teachers_state_2015_16_std c_ideology_std
##                                                 5272          -5291
##  g_ideology_std pct_students_frpl_std teacher_student_ratio_std
##           -2672                 562.4                      1615
##  state_spending_per_child_std
##                         378.3