Using quanteda’s fcm() and textplot_network(), you can perform visual analysis of social media posts in terms of cooccurances of hashtags or usernames in a few steps. The dataset for this example contains only 10,000 Twitter posts, but you can easily analyze more one million posts on your laptop computer.

library(quanteda)

Load sample data

load("data/data_corpus_tweets.rda")

Construct a document-feature matrix of Twitter posts

tweet_dfm <- dfm(data_corpus_tweets, remove_punct = TRUE)
head(tweet_dfm)
## Document-feature matrix of: 6 documents, 42,327 features (100% sparse).

Hashtags

Extract most common hashtags

tag_dfm <- dfm_select(tweet_dfm, ('#*'))
toptag <- names(topfeatures(tag_dfm, 50))
head(toptag)
## [1] "#ep2014"       "#salvini"      "#fdian"        "#ukip"        
## [5] "#caraacaratve" "#alzalatesta"

Construct feature-occurrence matrix of hashtags

tag_fcm <- fcm(tag_dfm)
head(tag_fcm)
## Feature co-occurrence matrix of: 6 by 6 features.
## 6 x 6 sparse Matrix of class "fcm"
##                  features
## features          #pomeriggio5 #canale5 #miaou #iovotoitaliano #fdian
##   #pomeriggio5               0        2      0               0      0
##   #canale5                   0        0      0               0      1
##   #miaou                     0        0      0               0      0
##   #iovotoitaliano            0        0      0               0     60
##   #fdian                     0        0      0               0      0
##   #espanyaensroba            0        0      0               0      0
##                  features
## features          #espanyaensroba
##   #pomeriggio5                  0
##   #canale5                      0
##   #miaou                        0
##   #iovotoitaliano               0
##   #fdian                        0
##   #espanyaensroba               0
topgat_fcm <- fcm_select(tag_fcm, toptag)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.8, edge_size = 5)

Usernames

Extract most frequently mentioned usernames

user_dfm <- dfm_select(tweet_dfm, ('@*'))
topuser <- names(topfeatures(user_dfm, 50))
head(topuser)
## [1] "@pablo_iglesias_" "@elenavalenciano" "@canete2014_"    
## [4] "@nigel_farage"    "@martinschulz"    "@mlp_officiel"

Construct feature-occurrence matrix of usernames

user_fcm <- fcm(user_dfm)
head(user_fcm)
## Feature co-occurrence matrix of: 6 by 6 features.
## 6 x 6 sparse Matrix of class "fcm"
##                   features
## features           @pacomarhuenda @pablo_iglesias_ @kopriths @gapatzhs
##   @pacomarhuenda                0                1         0         0
##   @pablo_iglesias_              0                0         0         0
##   @kopriths                     0                0         0         1
##   @gapatzhs                     0                0         0         0
##   @mariaspyraki                 0                0         0         0
##   @ernesturtasun                0                0         0         0
##                   features
## features           @mariaspyraki @ernesturtasun
##   @pacomarhuenda               0              0
##   @pablo_iglesias_             0              1
##   @kopriths                    1              0
##   @gapatzhs                    1              0
##   @mariaspyraki                0              0
##   @ernesturtasun               0              0
user_fcm <- fcm_select(user_fcm, topuser)
textplot_network(user_fcm, min_freq = 0.1, edge_color = 'orange', edge_alpha = 0.8, edge_size = 5)