Using quanteda’s fcm() and textplot_network(), you can perform visual analysis of social media posts in terms of cooccurances of hashtags or usernames in a few steps. The dataset for this example contains only 10,000 Twitter posts, but you can easily analyze more one million posts on your laptop computer.

Hashtags

library(quanteda)

Load sample data

load("data/data_corpus_tweets.rda")

Construct a document-feature matrix of Twitter posts

tweet_dfm <- dfm(data_corpus_tweets, remove_punct = TRUE)
head(tweet_dfm)
## Document-feature matrix of: 6 documents, 42,327 features (100% sparse).

Extract most common hashtags

tag_dfm <- dfm_select(tweet_dfm, ('#*'))
toptag <- names(topfeatures(tag_dfm, 50))
head(toptag)
## [1] "#ep2014"       "#salvini"      "#fdian"        "#ukip"        
## [5] "#caraacaratve" "#alzalatesta"

Construct feature-occurrence matrix of hashtags

tag_fcm <- fcm(tag_dfm)
head(tag_fcm)
## Feature co-occurrence matrix of: 6 by 6 features.
## 6 x 6 sparse Matrix of class "fcm"
##                  features
## features          #pomeriggio5 #canale5 #miaou #iovotoitaliano #fdian
##   #pomeriggio5               0        2      0               0      0
##   #canale5                   0        0      0               0      1
##   #miaou                     0        0      0               0      0
##   #iovotoitaliano            0        0      0               0     60
##   #fdian                     0        0      0               0      0
##   #espanyaensroba            0        0      0               0      0
##                  features
## features          #espanyaensroba
##   #pomeriggio5                  0
##   #canale5                      0
##   #miaou                        0
##   #iovotoitaliano               0
##   #fdian                        0
##   #espanyaensroba               0
topgat_fcm <- fcm_select(tag_fcm, toptag)
textplot_network(topgat_fcm, min_freq = 0.1, edge_alpha = 0.8, edge_size = 5)

Usernames

Extract most frequently mentioned usernames

user_dfm <- dfm_select(tweet_dfm, ('@*'))
topuser <- names(topfeatures(user_dfm, 50))
head(topuser)
## [1] "@pablo_iglesias_" "@elenavalenciano" "@canete2014_"    
## [4] "@nigel_farage"    "@martinschulz"    "@mlp_officiel"

Construct feature-occurrence matrix of usernames

user_fcm <- fcm(user_dfm)
head(user_fcm)
## Feature co-occurrence matrix of: 6 by 6 features.
## 6 x 6 sparse Matrix of class "fcm"
##                   features
## features           @pacomarhuenda @pablo_iglesias_ @kopriths @gapatzhs
##   @pacomarhuenda                0                1         0         0
##   @pablo_iglesias_              0                0         0         0
##   @kopriths                     0                0         0         1
##   @gapatzhs                     0                0         0         0
##   @mariaspyraki                 0                0         0         0
##   @ernesturtasun                0                0         0         0
##                   features
## features           @mariaspyraki @ernesturtasun
##   @pacomarhuenda               0              0
##   @pablo_iglesias_             0              1
##   @kopriths                    1              0
##   @gapatzhs                    1              0
##   @mariaspyraki                0              0
##   @ernesturtasun               0              0
user_fcm <- fcm_select(user_fcm, topuser)
textplot_network(user_fcm, min_freq = 0.1, edge_color = 'orange', edge_alpha = 0.8, edge_size = 5)