I am trying to write an R script for text analytics; trying to analyze the text for:
- word frequency analysis
- sentiment analysis
- visualize sentiment over time
library(tidyverse) # For data manipulation
library(tidyr) # For data tidying
library(dplyr) # For data manipulation
library(ggplot2) # For data visualization
library(tm) # For text mining
library(wordcloud) # For word clouds
library(stringr) # For string manipulation
library(sentimentr) # For sentiment analysis
library(lubridate) # For date manipulation
# Split comments into words
words <- str_split(data$comments, "\\s+")
# Unlist and create a table of word frequencies
word_frequencies <- table(unlist(words))
# Sort and display top 20 words
sorted_word_frequencies <- sort(word_frequencies, decreasing = TRUE)
head(sorted_word_frequencies, 20)
wordcloud(names(sorted_word_frequencies), freq = sorted_word_frequencies, max.words = 100, random.order = FALSE)
sentiments <- sentiment(data$comments)
data$sentiment_score <- sentiments$sentiment
aggregate_sentiment <- data %>%
group_by(market, region, branch) %>%
summarise(avg_sentiment = mean(sentiment_score, na.rm = TRUE)
)
data$date <- as.Date(data$date) # Convert to Date object if necessary
sentiment_over_time <- data %>%
group_by(date) %>%
summarise(avg_sentiment = mean(sentiment_score, na.rm = TRUE))
ggplot(sentiment_over_time, aes(x = date, y = avg_sentiment)) + geom_line()
data$date <- as.Date(data$date) # Convert to Date object if necessary
sentiment_over_time <- data %>%
group_by(date) %>%
summarise(avg_sentiment = mean(sentiment_score, na.rm = TRUE))
ggplot(sentiment_over_time, aes(x = date, y = avg_sentiment)) + geom_line()
top_comments <- data %>%
arrange(desc(sentiment_score)) %>%
select(name, comments, sentiment_score)
top_positive <- head(top_comments, 10)
top_negative <- tail(top_comments, 10)
keywords <- c("service", "quality", "delivery")
data$keyword <- ifelse(str_detect(data$comments, paste(keywords, collapse = "|")), "Yes", "No")
keyword_frequency <- data %>%
filter(keyword == "Yes") %>%
count(market, region, name = "keyword")
This is what I was expecting