init

2023-03-26 18:36:49 +02:00 · 2023-03-26 18:36:49 +02:00 · 8d3c8b3974
commit 8d3c8b3974
77 changed files with 682928 additions and 0 deletions
--- a/ergebnisse_hackathon_repo/team-16/r-scripts/.Rhistory
+++ b/ergebnisse_hackathon_repo/team-16/r-scripts/.Rhistory
@ -0,0 +1,512 @@
+pm_csv <- c(pm_csv, str_c("data/2021-1-", 1:31, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-2-", 1:28, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-3-", 1:31, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-4-", 1:30, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-5-", 1:21, "_presseportal.csv"))
+pm_list <- lapply(pm_csv, read_csv)
+pm_list <- lapply(pm_csv, read_csv)
+pm <- do.call(rbind, pm_list)
+pm_demo <- read_csv("data/copbird_table_pm_topiced_demonstr.csv")
+tw_demo <- read_csv("data/copbird_table_tweet_topiced_demonstr.csv")
+pm_text <- pm$content
+pm_text <- pm_text[-which(is.na(pm_text))]  # remove missing values
+length(grep("(ots)", pm_text)) == length(pm_text)  # every report contains "ots"
+length(grep("(ots)", pm_text)) == length(pm_text)  # every report contains "ots"
+pm_text <- gsub("^(.*?\\(ots\\) - )", "", pm_text, perl = TRUE) # remove <Ort (ots) - >
+pm_text <- gsub("^(.*?\\(ots\\) - )", "", pm_text, perl = TRUE) # remove <Ort (ots) - >
+pm_text <- gsub("( \\.\\.\\.)$", "", pm_text)  # remove < ...>
+pm_text <- gsub("( \\.\\.\\.)$", "", pm_text)  # remove < ...>
+content_ber <- rep(NA, nrow(pm))
+content_ber <- rep(NA, nrow(pm))
+content_ber[which(!is.na(pm$content))] <- pm_text
+content_ber[which(!is.na(pm$content))] <- pm_text
+pm <- cbind(pm, content_ber)
+pm_text <- gsub("[^[:alnum:] ]", "", pm_text)
+pm_text <- gsub("[^[:alnum:] ]", "", pm_text)
+content_ber_satzzeichen <- rep(NA, nrow(pm))
+content_ber_satzzeichen <- rep(NA, nrow(pm))
+content_ber_satzzeichen[which(!is.na(pm$content))] <- pm_text
+pm <- cbind(pm, content_ber_satzzeichen)
+head(pm)
+pm_text <- pm_demo$content
+pm_text <- gsub("^(.*?\\(ots\\) - )", "", pm_text, perl = TRUE) # remove <Ort (ots) - >
+pm_text <- gsub("( \\.\\.\\.)$", "", pm_text)  # remove < ...>
+content_ber <- rep(NA, nrow(pm_demo))
+content_ber[which(!is.na(pm_demo$content))] <- pm_text
+pm_demo <- cbind(pm_demo, content_ber)
+pm_text <- gsub("[^[:alnum:] ]", "", pm_text)
+content_ber_satzzeichen <- rep(NA, nrow(pm_demo))
+content_ber_satzzeichen[which(!is.na(pm_demo$content))] <- pm_text
+pm_demo <- cbind(pm_demo, content_ber_satzzeichen)
+head(pm_demo)
+readAndflattenSentiWS <- function(filename) {
+words = readLines(filename, encoding="UTF-8")
+words <- sub("\\|[A-Z]+\t[0-9.-]+\t?", ",", words)
+words <- unlist(strsplit(words, ","))
+words <- tolower(words)
+return(words)
+}
+pos.words <- c(scan("data/positive-words.txt",what='character', comment.char=';', quiet=T),
+readAndflattenSentiWS("data/positive-words.txt"))
+neg.words <- c(scan("data/negative-words.txt",what='character', comment.char=';', quiet=T),
+readAndflattenSentiWS("data/negative-words.txt"))
+score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
+{
+#require(plyr)
+require(stringr)
+scores = laply(sentences, function(sentence, pos.words, neg.words)
+{
+# clean up sentences with R's regex-driven global substitute, gsub():
+sentence = gsub('[[:punct:]]', '', sentence)
+sentence = gsub('[[:cntrl:]]', '', sentence)
+sentence = gsub('\\d+', '', sentence)
+# and convert to lower case:
+sentence = tolower(sentence)
+# split into words. str_split is in the stringr package
+word.list = str_split(sentence, '\\s+')
+# sometimes a list() is one level of hierarchy too much
+words = unlist(word.list)
+# compare our words to the dictionaries of positive & negative terms
+pos.matches = match(words, pos.words)
+neg.matches = match(words, neg.words)
+# match() returns the position of the matched term or NA
+# we just want a TRUE/FALSE:
+pos.matches = !is.na(pos.matches)
+neg.matches = !is.na(neg.matches)
+# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
+score = sum(pos.matches) - sum(neg.matches)
+return(score)
+},
+pos.words, neg.words, .progress=.progress )
+scores.df = data.frame(score=scores, text=sentences)
+return(scores.df)
+}
+score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
+{
+#require(plyr)
+require(stringr)
+scores = lapply(sentences, function(sentence, pos.words, neg.words)
+{
+# clean up sentences with R's regex-driven global substitute, gsub():
+sentence = gsub('[[:punct:]]', '', sentence)
+sentence = gsub('[[:cntrl:]]', '', sentence)
+sentence = gsub('\\d+', '', sentence)
+# and convert to lower case:
+sentence = tolower(sentence)
+# split into words. str_split is in the stringr package
+word.list = str_split(sentence, '\\s+')
+# sometimes a list() is one level of hierarchy too much
+words = unlist(word.list)
+# compare our words to the dictionaries of positive & negative terms
+pos.matches = match(words, pos.words)
+neg.matches = match(words, neg.words)
+# match() returns the position of the matched term or NA
+# we just want a TRUE/FALSE:
+pos.matches = !is.na(pos.matches)
+neg.matches = !is.na(neg.matches)
+# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
+score = sum(pos.matches) - sum(neg.matches)
+return(score)
+},
+pos.words, neg.words, .progress=.progress )
+scores.df = data.frame(score=scores, text=sentences)
+return(scores.df)
+}
+score_pm_demo <- score.sentiment(pm_demo$content, pos.words, neg.words)
+score_tw_demo <- score.sentiment(tw_demo$tweet_text, pos.words, neg.words)
+ggplot(score_pm_demo) +
+geom_bar(aes(x = score), fill = "blue") +
+labs(title = "Topic: Demonstrationen", subtitle = "Sentiment-Analyse der Pressemeldungen") +
+theme_minimal()
+score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
+{
+#require(plyr)
+require(stringr)
+scores = lapply(sentences, function(sentence, pos.words, neg.words)
+{
+# clean up sentences with R's regex-driven global substitute, gsub():
+sentence = gsub('[[:punct:]]', '', sentence)
+sentence = gsub('[[:cntrl:]]', '', sentence)
+sentence = gsub('\\d+', '', sentence)
+# and convert to lower case:
+sentence = tolower(sentence)
+# split into words. str_split is in the stringr package
+word.list = str_split(sentence, '\\s+')
+# sometimes a list() is one level of hierarchy too much
+words = unlist(word.list)
+# compare our words to the dictionaries of positive & negative terms
+pos.matches = match(words, pos.words)
+neg.matches = match(words, neg.words)
+# match() returns the position of the matched term or NA
+# we just want a TRUE/FALSE:
+pos.matches = !is.na(pos.matches)
+neg.matches = !is.na(neg.matches)
+# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
+score = sum(pos.matches) - sum(neg.matches)
+return(score)
+},
+pos.words, neg.words, .progress=.progress )
+scores.df = data.frame(score=scores, text=sentences)
+return(scores.df)
+}
+score_pm_demo <- score.sentiment(pm_demo$content, pos.words, neg.words)
+library(plyr)
+score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
+{
+require(plyr)
+require(stringr)
+scores = lapply(sentences, function(sentence, pos.words, neg.words)
+{
+# clean up sentences with R's regex-driven global substitute, gsub():
+sentence = gsub('[[:punct:]]', '', sentence)
+sentence = gsub('[[:cntrl:]]', '', sentence)
+sentence = gsub('\\d+', '', sentence)
+# and convert to lower case:
+sentence = tolower(sentence)
+# split into words. str_split is in the stringr package
+word.list = str_split(sentence, '\\s+')
+# sometimes a list() is one level of hierarchy too much
+words = unlist(word.list)
+# compare our words to the dictionaries of positive & negative terms
+pos.matches = match(words, pos.words)
+neg.matches = match(words, neg.words)
+# match() returns the position of the matched term or NA
+# we just want a TRUE/FALSE:
+pos.matches = !is.na(pos.matches)
+neg.matches = !is.na(neg.matches)
+# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
+score = sum(pos.matches) - sum(neg.matches)
+return(score)
+},
+pos.words, neg.words, .progress=.progress )
+scores.df = data.frame(score=scores, text=sentences)
+return(scores.df)
+}
+score_pm_demo <- score.sentiment(pm_demo$content, pos.words, neg.words)
+score.sentiment = function(sentences, pos.words, neg.words, .progress='none')
+{
+require(plyr)
+require(stringr)
+scores = lapply(sentences, function(sentence, pos.words, neg.words)
+{
+# clean up sentences with R's regex-driven global substitute, gsub():
+sentence = gsub('[[:punct:]]', '', sentence)
+sentence = gsub('[[:cntrl:]]', '', sentence)
+sentence = gsub('\\d+', '', sentence)
+# and convert to lower case:
+sentence = tolower(sentence)
+# split into words. str_split is in the stringr package
+word.list = str_split(sentence, '\\s+')
+# sometimes a list() is one level of hierarchy too much
+words = unlist(word.list)
+# compare our words to the dictionaries of positive & negative terms
+pos.matches = match(words, pos.words)
+neg.matches = match(words, neg.words)
+# match() returns the position of the matched term or NA
+# we just want a TRUE/FALSE:
+pos.matches = !is.na(pos.matches)
+neg.matches = !is.na(neg.matches)
+# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
+score = sum(pos.matches) - sum(neg.matches)
+return(score)
+},
+pos.words, neg.words, .progress=.progress )
+scores.df = data.frame(score=scores, text=sentences)
+return(scores.df)
+}
+score_pm_demo <- score.sentiment(pm_demo$content, pos.words, neg.words)
+score.sentiment = function(sentences, pos.words, neg.words, .progress='none') {
+require(plyr)
+require(stringr)
+scores = laply(sentences, function(sentence, pos.words, neg.words)
+{
+# clean up sentences with R's regex-driven global substitute, gsub():
+sentence = gsub('[[:punct:]]', '', sentence)
+sentence = gsub('[[:cntrl:]]', '', sentence)
+sentence = gsub('\\d+', '', sentence)
+# and convert to lower case:
+sentence = tolower(sentence)
+# split into words. str_split is in the stringr package
+word.list = str_split(sentence, '\\s+')
+# sometimes a list() is one level of hierarchy too much
+words = unlist(word.list)
+# compare our words to the dictionaries of positive & negative terms
+pos.matches = match(words, pos.words)
+neg.matches = match(words, neg.words)
+# match() returns the position of the matched term or NA
+# I don't just want a TRUE/FALSE! How can I do this?
+pos.matches = !is.na(pos.matches)
+neg.matches = !is.na(neg.matches)
+# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
+score = sum(pos.matches) - sum(neg.matches)
+return(score)
+},
+pos.words, neg.words, .progress=.progress )
+scores.df = data.frame(score=scores, text=sentences)
+return(scores.df)
+}
+score_pm_demo <- score.sentiment(pm_demo$content, pos.words, neg.words)
+score_tw_demo <- score.sentiment(tw_demo$tweet_text, pos.words, neg.words)
+ggplot(score_pm_demo) +
+geom_bar(aes(x = score), fill = "blue") +
+labs(title = "Topic: Demonstrationen", subtitle = "Sentiment-Analyse der Pressemeldungen") +
+theme_minimal()
+ggplot(score_tw_demo) +
+geom_bar(aes(x = score), fill = "blue") +
+labs(title = "Topic: Demonstrationen", subtitle = "Sentiment-Analyse der Tweets") +
+theme_minimal()
+View(score_tw_demo)
+knitr::opts_chunk$set(echo = TRUE)
+library(tidyverse)
+library(stringi)
+pm_csv <- str_c("data/2020-12-", 1:26, "_presseportal.csv")
+pm_csv <- c(pm_csv, str_c("data/2021-1-", 1:31, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-2-", 1:28, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-3-", 1:31, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-4-", 1:30, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-5-", 1:21, "_presseportal.csv"))
+pm_list <- lapply(pm_csv, read_csv)
+pm <- do.call(rbind, pm_list)
+pm_demo <- read_csv("data/copbird_table_pm_topiced_demonstr.csv")
+tw_demo <- read_csv("data/copbird_table_tweet_topiced_demonstr.csv")
+pm_text <- pm$content
+pm_text <- pm_text[-which(is.na(pm_text))]  # remove missing values
+length(grep("(ots)", pm_text)) == length(pm_text)  # every report contains "ots"
+pm_text <- gsub("^(.*?\\(ots\\) - )", "", pm_text, perl = TRUE) # remove <Ort (ots) - >
+pm_text <- gsub("( \\.\\.\\.)$", "", pm_text)  # remove < ...>
+content_ber <- rep(NA, nrow(pm))
+content_ber[which(!is.na(pm$content))] <- pm_text
+pm <- cbind(pm, content_ber)
+pm_text <- gsub("[^[:alnum:] ]", "", pm_text)
+content_ber_satzzeichen <- rep(NA, nrow(pm))
+content_ber_satzzeichen[which(!is.na(pm$content))] <- pm_text
+pm <- cbind(pm, content_ber_satzzeichen)
+head(pm)
+# csvpath <- <your path>
+# write_csv(pm, str_c(csvpath, "/pressemeldungen.csv"))
+pm_text <- pm_demo$content
+pm_text <- gsub("^(.*?\\(ots\\) - )", "", pm_text, perl = TRUE) # remove <Ort (ots) - >
+pm_text <- gsub("( \\.\\.\\.)$", "", pm_text)  # remove < ...>
+content_ber <- rep(NA, nrow(pm_demo))
+content_ber[which(!is.na(pm_demo$content))] <- pm_text
+pm_demo <- cbind(pm_demo, content_ber)
+pm_text <- gsub("[^[:alnum:] ]", "", pm_text)
+content_ber_satzzeichen <- rep(NA, nrow(pm_demo))
+content_ber_satzzeichen[which(!is.na(pm_demo$content))] <- pm_text
+pm_demo <- cbind(pm_demo, content_ber_satzzeichen)
+head(pm_demo)
+readAndflattenSentiWS <- function(filename) {
+words = readLines(filename, encoding="UTF-8")
+words <- sub("\\|[A-Z]+\t[0-9.-]+\t?", ",", words)
+words <- unlist(strsplit(words, ","))
+words <- tolower(words)
+return(words)
+}
+pos.words <- c(scan("data/positive-words.txt",what='character', comment.char=';', quiet=T),
+readAndflattenSentiWS("data/positive-words.txt"))
+neg.words <- c(scan("data/negative-words.txt",what='character', comment.char=';', quiet=T),
+readAndflattenSentiWS("data/negative-words.txt"))
+score.sentiment = function(sentences, pos.words, neg.words, .progress='none') {
+require(plyr)
+require(stringr)
+scores = laply(sentences, function(sentence, pos.words, neg.words)
+{
+# clean up sentences with R's regex-driven global substitute, gsub():
+sentence = gsub('[[:punct:]]', '', sentence)
+sentence = gsub('[[:cntrl:]]', '', sentence)
+sentence = gsub('\\d+', '', sentence)
+# and convert to lower case:
+sentence = tolower(sentence)
+# split into words. str_split is in the stringr package
+word.list = str_split(sentence, '\\s+')
+# sometimes a list() is one level of hierarchy too much
+words = unlist(word.list)
+# compare our words to the dictionaries of positive & negative terms
+pos.matches = match(words, pos.words)
+neg.matches = match(words, neg.words)
+# match() returns the position of the matched term or NA
+# I don't just want a TRUE/FALSE! How can I do this?
+pos.matches = !is.na(pos.matches)
+neg.matches = !is.na(neg.matches)
+# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
+score = sum(pos.matches) - sum(neg.matches)
+return(score)
+},
+pos.words, neg.words, .progress=.progress )
+scores.df = data.frame(score=scores, text=sentences)
+return(scores.df)
+}
+score_pm_demo <- score.sentiment(pm_demo$content, pos.words, neg.words)
+score_tw_demo <- score.sentiment(tw_demo$tweet_text, pos.words, neg.words)
+ggplot(score_pm_demo) +
+geom_bar(aes(x = score), fill = "blue") +
+labs(title = "Topic: Demonstrationen", subtitle = "Sentiment-Analyse der Pressemeldungen") +
+theme_minimal()
+ggplot(score_tw_demo) +
+geom_bar(aes(x = score), fill = "blue") +
+labs(title = "Topic: Demonstrationen", subtitle = "Sentiment-Analyse der Tweets") +
+theme_minimal()
+View(score_tw_demo)
+Ciew(score_pm_demo)
+View(score_pm_demo)
+score_pm_demo$text[3]
+knitr::opts_chunk$set(echo = TRUE)
+library(tidyverse)
+library(stringi)
+# Read in data
+pm_csv <- str_c("data/2020-12-", 1:26, "_presseportal.csv")
+pm_csv <- c(pm_csv, str_c("data/2021-1-", 1:31, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-2-", 1:28, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-3-", 1:31, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-4-", 1:30, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-5-", 1:21, "_presseportal.csv"))
+pm_list <- lapply(pm_csv, read_csv)
+pm <- do.call(rbind, pm_list)
+summary(pm)
+tweets <- read_csv("data/copbird_table_tweet.csv")
+tweets <- tweets[tweets$created_at >= "2021-04-01", 1:4]
+usersX <- read_csv("data/copbird_table_user_ext.csv")
+# tweetXstate <- read_csv("data/copbird_table_tweet_ext_state.csv")
+# tweetXstate$stadt[tweetXstate$user_name == "Polizei Oldenburg-Stadt/Ammerl"] <- "Oldenburg"
+# tweetXstate$stadt[tweetXstate$user_name == "Polizei Mecklenburgische Seenp"] <- "Neubrandenburg"
+# tweetXstate$stadt[tweetXstate$user_name == "Polizei Wilhelmshaven/Frieslan"] <- "Wilhelmshaven"
+# tweetXstate$stadt[tweetXstate$user_name == "Bundespolizei Baden-Württember"] <- "Stuttgart"
+# tweetXstate$stadt[tweetXstate$user_name == "Landeskriminalamt Rheinland-Pf"] <- "Mainz"
+# tweetXstate$stadt[tweetXstate$user_name == "Bundespolizei Mitteldeutschlan"] <- "Pirna"
+# tweetXstate$stadt[tweetXstate$user_name == "Polizei Delmenhorst/Oldenburg-"] <- "Delmenhorst"
+# tweetXstate$stadt[tweetXstate$user_name == "Bundespolizei Flughafen Frankf"] <- "Frankfurt"
+# blaulicht <- read_csv("data/2020-12_2021-05_presseportal.csv")
+# users <- read_csv("data/copbird_table_user.csv")
+# str(users)
+# users$name <- as.factor(users$name)
+# users$handle <- as.factor(users$handle)
+pm_orte <- pm %>% group_by(bundesland) %>% count(location)
+head(pm_orte)
+head(pm_orte %>% arrange(desc(n)), n = 20)
+knitr::opts_chunk$set(echo = TRUE)
+library(tidyverse)
+library(stringi)
+pm_csv <- str_c("data/2020-12-", 1:26, "_presseportal.csv")
+pm_csv <- c(pm_csv, str_c("data/2021-1-", 1:31, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-2-", 1:28, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-3-", 1:31, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-4-", 1:30, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-5-", 1:21, "_presseportal.csv"))
+pm_list <- lapply(pm_csv, read_csv)
+pm <- do.call(rbind, pm_list)
+tweets <- read_csv("data/copbird_table_tweet.csv")
+tweets <- tweets[tweets$created_at >= "2021-04-01", 1:4]
+usersX <- read_csv("data/copbird_table_user_ext.csv")
+tweetXstate <- read_csv("data/copbird_table_tweet_ext_state.csv")
+blaulicht <- read_csv("data/2020-12_2021-05_presseportal.csv")
+pm_demo <- read_csv("data/copbird_table_pm_topiced_demonstr.csv")
+tw_demo <- read_csv("data/copbird_table_tweet_topiced_demonstr.csv")
+pm_drogen <- read_csv("data/copbird_table_pm_topiced_drogen.csv")
+tw_drogen <- read_csv("data/copbird_table_tweet_topiced_drogen.csv")
+pm_rass <- read_csv("data/copbird_table_pm_topiced_rassis.csv")
+tw_rass <- read_csv("data/copbird_table_tweet_topiced_rassis.csv")
+head(usersX)
+head(tweetXstate[, 5:8])
+blaulicht$tw_user_id <- as.character(blaulicht$tw_user_id)
+head(blaulicht[, -c(2, 5)])
+land_tw <- full_join(tweets, usersX[c(1, 4)], by = "user_id")
+land_tw$bundesland[land_tw$bundesland == "-"] <- NA_character_
+land_tw <- land_tw %>% group_by(bundesland) %>% count()
+land_tw$bundesland <- as.factor(land_tw$bundesland)
+land_pm <- pm %>% group_by(bundesland) %>% count()
+land_pm$bundesland[land_pm$bundesland == "berlin-brandenburg"] <- "berlin"
+land_pm$bundesland <- stri_trans_totitle(land_pm$bundesland)
+land_pm$bundesland <- gsub("ue", "ü", land_pm$bundesland)
+land_pm$bundesland <- factor(land_pm$bundesland, levels = levels(land_tw$bundesland))
+land_pm_tw <- full_join(land_pm, land_tw, by = "bundesland")
+names(land_pm_tw)[2:3] <- c("Pressemeldung", "Twitter")
+land_pm_tw <- land_pm_tw[-which(is.na(land_pm_tw$bundesland)), ]
+land_pm_tw$Pressemeldung[which(is.na(land_pm_tw$Pressemeldung))] <- 0
+land_pm_tw <- gather(land_pm_tw, key = "Plattform", value = "count", -bundesland)
+ggplot(land_pm_tw) +
+geom_col(aes(x = bundesland, y = count, fill = Plattform)) +
+scale_fill_manual(values = c("#CC6699", "#0099CC")) +
+facet_wrap(~Plattform) +
+coord_flip() +
+guides(fill = FALSE) +
+labs(title = "Anzahl der Pressemeldungen und Tweets",
+subtitle = "Im Zeitraum April bis Mai 2021") +
+theme_minimal()
+ggplot(land_pm_tw) +
+geom_col(aes(x = bundesland, y = count, fill = Plattform), position = "fill") +
+scale_fill_manual(values = c("#CC6699", "#0099CC")) +
+coord_flip() +
+labs(title = "Anzahl der Pressemeldungen und Tweets",
+subtitle = "Im Zeitraum April bis Mai 2021") +
+theme_minimal()
+readAndflattenSentiWS <- function(filename) {
+words = readLines(filename, encoding="UTF-8")
+words <- sub("\\|[A-Z]+\t[0-9.-]+\t?", ",", words)
+words <- unlist(strsplit(words, ","))
+words <- tolower(words)
+return(words)
+}
+pos.words <- c(scan("data/positive-words.txt",what='character', comment.char=';', quiet=T),
+readAndflattenSentiWS("data/positive-words.txt"))
+neg.words <- c(scan("data/negative-words.txt",what='character', comment.char=';', quiet=T),
+readAndflattenSentiWS("data/negative-words.txt"))
+score.sentiment = function(sentences, pos.words, neg.words, .progress='none') {
+require(plyr)
+require(stringr)
+scores = laply(sentences, function(sentence, pos.words, neg.words)
+{
+# clean up sentences with R's regex-driven global substitute, gsub():
+sentence = gsub('[[:punct:]]', '', sentence)
+sentence = gsub('[[:cntrl:]]', '', sentence)
+sentence = gsub('\\d+', '', sentence)
+# and convert to lower case:
+sentence = tolower(sentence)
+# split into words. str_split is in the stringr package
+word.list = str_split(sentence, '\\s+')
+# sometimes a list() is one level of hierarchy too much
+words = unlist(word.list)
+# compare our words to the dictionaries of positive & negative terms
+pos.matches = match(words, pos.words)
+neg.matches = match(words, neg.words)
+# match() returns the position of the matched term or NA
+# I don't just want a TRUE/FALSE! How can I do this?
+pos.matches = !is.na(pos.matches)
+neg.matches = !is.na(neg.matches)
+# and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
+score = sum(pos.matches) - sum(neg.matches)
+return(score)
+},
+pos.words, neg.words, .progress=.progress )
+scores.df = data.frame(score=scores, text=sentences)
+return(scores.df)
+}
+score_pm_demo <- score.sentiment(pm_demo$content, pos.words, neg.words)
+score_tw_demo <- score.sentiment(tw_demo$tweet_text, pos.words, neg.words)
+ggplot(score_pm_demo) +
+geom_bar(aes(x = score), fill = "blue") +
+labs(title = "Topic: Demonstrationen", subtitle = "Sentiment-Analyse der Pressemeldungen") +
+theme_minimal()
+ggplot(score_tw_demo) +
+geom_bar(aes(x = score), fill = "blue") +
+labs(title = "Topic: Demonstrationen", subtitle = "Sentiment-Analyse der Tweets") +
+theme_minimal()
+score_pm_drogen <- score.sentiment(pm_demo$content, pos.words, neg.words)
+score_tw_drogen <- score.sentiment(tw_demo$tweet_text, pos.words, neg.words)
+ggplot(score_pm_drogen) +
+geom_bar(aes(x = score), fill = "darkgreen") +
+labs(title = "Topic: Drogen", subtitle = "Sentiment-Analyse der Pressemeldungen") +
+theme_minimal()
+ggplot(score_tw_drogen) +
+geom_bar(aes(x = score), fill = "darkgreen") +
+labs(title = "Topic: Drogen", subtitle = "Sentiment-Analyse der Tweets") +
+theme_minimal()
+score_pm_rass <- score.sentiment(pm_rass$content, pos.words, neg.words)
+score_tw_rass <- score.sentiment(tw_rass$tweet_text, pos.words, neg.words)
+ggplot(score_pm_rass) +
+geom_bar(aes(x = score), fill = "purple") +
+labs(title = "Topic: Rassismus", subtitle = "Sentiment-Analyse der Pressemeldungen") +
+theme_minimal()
+ggplot(score_tw_rass) +
+geom_bar(aes(x = score), fill = "purple") +
+labs(title = "Topic: Rassismus", subtitle = "Sentiment-Analyse der Tweets") +
+theme_minimal()
--- a/ergebnisse_hackathon_repo/team-16/r-scripts/.ipynb_checkpoints/Presse
+++ b/ergebnisse_hackathon_repo/team-16/r-scripts/.ipynb_checkpoints/Presse
@ -0,0 +1,216 @@
+---
+title: "Team 16"
+author: "Christian, Simon und Cuca"
+date: "23 5 2021"
+output: pdf_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+# Daten einlesen
+```{r, message = FALSE}
+library(tidyverse)
+library(stringi)
+
+pm_csv <- str_c("data/2020-12-", 1:26, "_presseportal.csv")
+pm_csv <- c(pm_csv, str_c("data/2021-1-", 1:31, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-2-", 1:28, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-3-", 1:31, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-4-", 1:30, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-5-", 1:21, "_presseportal.csv"))
+pm_list <- lapply(pm_csv, read_csv)
+pm <- do.call(rbind, pm_list)
+
+tweets <- read_csv("data/copbird_table_tweet.csv")
+tweets <- tweets[tweets$created_at >= "2021-04-01", 1:4]
+usersX <- read_csv("data/copbird_table_user_ext.csv")
+tweetXstate <- read_csv("data/copbird_table_tweet_ext_state.csv")
+blaulicht <- read_csv("data/2020-12_2021-05_presseportal.csv")
+
+pm_demo <- read_csv("data/copbird_table_pm_topiced_demonstr.csv")
+tw_demo <- read_csv("data/copbird_table_tweet_topiced_demonstr.csv")
+
+pm_drogen <- read_csv("data/copbird_table_pm_topiced_drogen.csv")
+tw_drogen <- read_csv("data/copbird_table_tweet_topiced_drogen.csv")
+
+pm_rass <- read_csv("data/copbird_table_pm_topiced_rassis.csv")
+tw_rass <- read_csv("data/copbird_table_tweet_topiced_rassis.csv")
+```
+
+
+# Scrapen der Pressemeldungen (seit Dezember 2020)
+
+# Zuordnung von Orten der Pressemeldungen und Tweets
+```{r}
+head(usersX)
+head(tweetXstate[, 5:8])
+blaulicht$tw_user_id <- as.character(blaulicht$tw_user_id)
+head(blaulicht[, -c(2, 5)])
+```
+
+# Anzahl Pressemeldungen vs. Tweets
+```{r}
+land_tw <- full_join(tweets, usersX[c(1, 4)], by = "user_id")
+land_tw$bundesland[land_tw$bundesland == "-"] <- NA_character_
+land_tw <- land_tw %>% group_by(bundesland) %>% count()
+land_tw$bundesland <- as.factor(land_tw$bundesland)
+
+land_pm <- pm %>% group_by(bundesland) %>% count()
+land_pm$bundesland[land_pm$bundesland == "berlin-brandenburg"] <- "berlin"
+land_pm$bundesland <- stri_trans_totitle(land_pm$bundesland)
+land_pm$bundesland <- gsub("ue", "ü", land_pm$bundesland)
+land_pm$bundesland <- factor(land_pm$bundesland, levels = levels(land_tw$bundesland))
+
+land_pm_tw <- full_join(land_pm, land_tw, by = "bundesland")
+names(land_pm_tw)[2:3] <- c("Pressemeldung", "Twitter")
+land_pm_tw <- land_pm_tw[-which(is.na(land_pm_tw$bundesland)), ]
+land_pm_tw$Pressemeldung[which(is.na(land_pm_tw$Pressemeldung))] <- 0
+land_pm_tw <- gather(land_pm_tw, key = "Plattform", value = "count", -bundesland)
+
+ggplot(land_pm_tw) +
+  geom_col(aes(x = bundesland, y = count, fill = Plattform)) +
+  scale_fill_manual(values = c("#CC6699", "#0099CC")) +
+  facet_wrap(~Plattform) +
+  coord_flip() +
+  guides(fill = FALSE) +
+  labs(title = "Anzahl der Pressemeldungen und Tweets", 
+       subtitle = "Im Zeitraum April bis Mai 2021") +
+  theme_minimal()
+
+ggplot(land_pm_tw) +
+  geom_col(aes(x = bundesland, y = count, fill = Plattform), position = "fill") +
+  scale_fill_manual(values = c("#CC6699", "#0099CC")) +
+  coord_flip() +
+  labs(title = "Anzahl der Pressemeldungen und Tweets", 
+       subtitle = "Im Zeitraum April bis Mai 2021") +
+  theme_minimal()
+```
+
+# Topic modelling
+```{r, message=FALSE}
+# library(quanteda)
+# library(tidyverse)
+# library(topicmodels)
+# library(ldatuning)
+# library(stm)
+# library(wordcloud)
+# 
+# pm <- pm[!is.na(pm$content), ]
+# tok <- tokens(pm$content_ber_satzzeichen)
+# mydfm <- dfm(tok, remove_numbers = TRUE, remove_punct = TRUE, remove_symbols = TRUE, remove = stopwords("german"))
+# mydfm.trim <-  dfm_trim(mydfm, min_docfreq = 3, max_docfreq = 65)
+# # mydfm.trim
+# 
+# anzahl.themen <- 10
+# anzahl.woerter <- 10
+# dfm2topicmodels <- convert(mydfm.trim, to = "topicmodels")
+# lda.modell <- LDA(dfm2topicmodels, anzahl.themen)
+# lda.modell
+# topmod <- as.data.frame(terms(lda.modell, anzahl.woerter))
+# topmod
+# 
+# write_csv(topmod, "data/topicmodel.csv")
+```
+
+### Auswahl der Keywords
+`topic_1 = ['demonstr', 'kundgeb']`
+
+`topic_2 = ['drogen', 'weed', 'graas', 'lsd', 'cannabis', 'ecstasy', 'kokain', 'meth', 'crystal']`
+
+`topic_3 = ['rassis', 'diskriminier', 'ausländerfeindlich', 'fremdenfeindlich', 'fremdenhass']`
+
+`topic_4 = ['antisem', 'juden', 'synagoge', 'judenhass', 'judenfeindlich', 'holocaust']`
+
+
+
+# Sentiment Analyse
+```{r}
+readAndflattenSentiWS <- function(filename) { 
+  words = readLines(filename, encoding="UTF-8")
+  words <- sub("\\|[A-Z]+\t[0-9.-]+\t?", ",", words)
+  words <- unlist(strsplit(words, ","))
+  words <- tolower(words)
+  return(words)
+}
+
+pos.words <- c(scan("SentiWS/positive-words.txt",what='character', comment.char=';', quiet=T), 
+               readAndflattenSentiWS("SentiWS/positive-words.txt"))
+neg.words <- c(scan("SentiWS/negative-words.txt",what='character', comment.char=';', quiet=T), 
+              readAndflattenSentiWS("SentiWS/negative-words.txt"))
+
+score.sentiment = function(sentences, pos.words, neg.words, .progress='none') {
+  require(plyr)
+  require(stringr)
+  scores = laply(sentences, function(sentence, pos.words, neg.words) 
+  {
+    # clean up sentences with R's regex-driven global substitute, gsub():
+    sentence = gsub('[[:punct:]]', '', sentence)
+    sentence = gsub('[[:cntrl:]]', '', sentence)
+    sentence = gsub('\\d+', '', sentence)
+    # and convert to lower case:
+    sentence = tolower(sentence)
+    # split into words. str_split is in the stringr package
+    word.list = str_split(sentence, '\\s+')
+    # sometimes a list() is one level of hierarchy too much
+    words = unlist(word.list)
+    # compare our words to the dictionaries of positive & negative terms
+    pos.matches = match(words, pos.words)
+    neg.matches = match(words, neg.words)
+    # match() returns the position of the matched term or NA
+    # I don't just want a TRUE/FALSE! How can I do this?
+    pos.matches = !is.na(pos.matches)
+    neg.matches = !is.na(neg.matches)
+    # and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
+    score = sum(pos.matches) - sum(neg.matches)
+    return(score)
+  }, 
+  pos.words, neg.words, .progress=.progress )
+  scores.df = data.frame(score=scores, text=sentences)
+  return(scores.df)
+}
+
+score_pm_demo <- score.sentiment(pm_demo$content, pos.words, neg.words)
+score_tw_demo <- score.sentiment(tw_demo$tweet_text, pos.words, neg.words)
+
+ggplot(score_pm_demo) +
+  geom_bar(aes(x = score), fill = "blue") +
+  labs(title = "Topic: Demonstrationen", subtitle = "Sentiment-Analyse der Pressemeldungen") +
+  theme_minimal()
+
+ggplot(score_tw_demo) +
+  geom_bar(aes(x = score), fill = "blue") +
+  labs(title = "Topic: Demonstrationen", subtitle = "Sentiment-Analyse der Tweets") +
+  theme_minimal()
+
+score_pm_drogen <- score.sentiment(pm_demo$content, pos.words, neg.words)
+score_tw_drogen <- score.sentiment(tw_demo$tweet_text, pos.words, neg.words)
+
+ggplot(score_pm_drogen) +
+  geom_bar(aes(x = score), fill = "darkgreen") +
+  labs(title = "Topic: Drogen", subtitle = "Sentiment-Analyse der Pressemeldungen") +
+  theme_minimal()
+
+ggplot(score_tw_drogen) +
+  geom_bar(aes(x = score), fill = "darkgreen") +
+  labs(title = "Topic: Drogen", subtitle = "Sentiment-Analyse der Tweets") +
+  theme_minimal()
+
+score_pm_rass <- score.sentiment(pm_rass$content, pos.words, neg.words)
+score_tw_rass <- score.sentiment(tw_rass$tweet_text, pos.words, neg.words)
+
+ggplot(score_pm_rass) +
+  geom_bar(aes(x = score), fill = "purple") +
+  labs(title = "Topic: Rassismus", subtitle = "Sentiment-Analyse der Pressemeldungen") +
+  theme_minimal()
+
+ggplot(score_tw_rass) +
+  geom_bar(aes(x = score), fill = "purple") +
+  labs(title = "Topic: Rassismus", subtitle = "Sentiment-Analyse der Tweets") +
+  theme_minimal()
+```
+
+```{r}
+sessionInfo()
+```
--- a/ergebnisse_hackathon_repo/team-16/r-scripts/Presse
+++ b/ergebnisse_hackathon_repo/team-16/r-scripts/Presse
@ -0,0 +1,216 @@
+---
+title: "Team 16"
+author: "Christian, Simon und Cuca"
+date: "23 5 2021"
+output: pdf_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+# Daten einlesen
+```{r, message = FALSE}
+library(tidyverse)
+library(stringi)
+
+pm_csv <- str_c("data/2020-12-", 1:26, "_presseportal.csv")
+pm_csv <- c(pm_csv, str_c("data/2021-1-", 1:31, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-2-", 1:28, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-3-", 1:31, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-4-", 1:30, "_presseportal.csv"))
+pm_csv <- c(pm_csv, str_c("data/2021-5-", 1:21, "_presseportal.csv"))
+pm_list <- lapply(pm_csv, read_csv)
+pm <- do.call(rbind, pm_list)
+
+tweets <- read_csv("data/copbird_table_tweet.csv")
+tweets <- tweets[tweets$created_at >= "2021-04-01", 1:4]
+usersX <- read_csv("data/copbird_table_user_ext.csv")
+tweetXstate <- read_csv("data/copbird_table_tweet_ext_state.csv")
+blaulicht <- read_csv("data/2020-12_2021-05_presseportal.csv")
+
+pm_demo <- read_csv("data/copbird_table_pm_topiced_demonstr.csv")
+tw_demo <- read_csv("data/copbird_table_tweet_topiced_demonstr.csv")
+
+pm_drogen <- read_csv("data/copbird_table_pm_topiced_drogen.csv")
+tw_drogen <- read_csv("data/copbird_table_tweet_topiced_drogen.csv")
+
+pm_rass <- read_csv("data/copbird_table_pm_topiced_rassis.csv")
+tw_rass <- read_csv("data/copbird_table_tweet_topiced_rassis.csv")
+```
+
+
+# Scrapen der Pressemeldungen (seit Dezember 2020)
+
+# Zuordnung von Orten der Pressemeldungen und Tweets
+```{r}
+head(usersX)
+head(tweetXstate[, 5:8])
+blaulicht$tw_user_id <- as.character(blaulicht$tw_user_id)
+head(blaulicht[, -c(2, 5)])
+```
+
+# Anzahl Pressemeldungen vs. Tweets
+```{r}
+land_tw <- full_join(tweets, usersX[c(1, 4)], by = "user_id")
+land_tw$bundesland[land_tw$bundesland == "-"] <- NA_character_
+land_tw <- land_tw %>% group_by(bundesland) %>% count()
+land_tw$bundesland <- as.factor(land_tw$bundesland)
+
+land_pm <- pm %>% group_by(bundesland) %>% count()
+land_pm$bundesland[land_pm$bundesland == "berlin-brandenburg"] <- "berlin"
+land_pm$bundesland <- stri_trans_totitle(land_pm$bundesland)
+land_pm$bundesland <- gsub("ue", "ü", land_pm$bundesland)
+land_pm$bundesland <- factor(land_pm$bundesland, levels = levels(land_tw$bundesland))
+
+land_pm_tw <- full_join(land_pm, land_tw, by = "bundesland")
+names(land_pm_tw)[2:3] <- c("Pressemeldung", "Twitter")
+land_pm_tw <- land_pm_tw[-which(is.na(land_pm_tw$bundesland)), ]
+land_pm_tw$Pressemeldung[which(is.na(land_pm_tw$Pressemeldung))] <- 0
+land_pm_tw <- gather(land_pm_tw, key = "Plattform", value = "count", -bundesland)
+
+ggplot(land_pm_tw) +
+  geom_col(aes(x = bundesland, y = count, fill = Plattform)) +
+  scale_fill_manual(values = c("#CC6699", "#0099CC")) +
+  facet_wrap(~Plattform) +
+  coord_flip() +
+  guides(fill = FALSE) +
+  labs(title = "Anzahl der Pressemeldungen und Tweets", 
+       subtitle = "Im Zeitraum April bis Mai 2021") +
+  theme_minimal()
+
+ggplot(land_pm_tw) +
+  geom_col(aes(x = bundesland, y = count, fill = Plattform), position = "fill") +
+  scale_fill_manual(values = c("#CC6699", "#0099CC")) +
+  coord_flip() +
+  labs(title = "Anzahl der Pressemeldungen und Tweets", 
+       subtitle = "Im Zeitraum April bis Mai 2021") +
+  theme_minimal()
+```
+
+# Topic modelling
+```{r, message=FALSE}
+# library(quanteda)
+# library(tidyverse)
+# library(topicmodels)
+# library(ldatuning)
+# library(stm)
+# library(wordcloud)
+# 
+# pm <- pm[!is.na(pm$content), ]
+# tok <- tokens(pm$content_ber_satzzeichen)
+# mydfm <- dfm(tok, remove_numbers = TRUE, remove_punct = TRUE, remove_symbols = TRUE, remove = stopwords("german"))
+# mydfm.trim <-  dfm_trim(mydfm, min_docfreq = 3, max_docfreq = 65)
+# # mydfm.trim
+# 
+# anzahl.themen <- 10
+# anzahl.woerter <- 10
+# dfm2topicmodels <- convert(mydfm.trim, to = "topicmodels")
+# lda.modell <- LDA(dfm2topicmodels, anzahl.themen)
+# lda.modell
+# topmod <- as.data.frame(terms(lda.modell, anzahl.woerter))
+# topmod
+# 
+# write_csv(topmod, "data/topicmodel.csv")
+```
+
+### Auswahl der Keywords
+`topic_1 = ['demonstr', 'kundgeb']`
+
+`topic_2 = ['drogen', 'weed', 'graas', 'lsd', 'cannabis', 'ecstasy', 'kokain', 'meth', 'crystal']`
+
+`topic_3 = ['rassis', 'diskriminier', 'ausländerfeindlich', 'fremdenfeindlich', 'fremdenhass']`
+
+`topic_4 = ['antisem', 'juden', 'synagoge', 'judenhass', 'judenfeindlich', 'holocaust']`
+
+
+
+# Sentiment Analyse
+```{r}
+readAndflattenSentiWS <- function(filename) { 
+  words = readLines(filename, encoding="UTF-8")
+  words <- sub("\\|[A-Z]+\t[0-9.-]+\t?", ",", words)
+  words <- unlist(strsplit(words, ","))
+  words <- tolower(words)
+  return(words)
+}
+
+pos.words <- c(scan("SentiWS/positive-words.txt",what='character', comment.char=';', quiet=T), 
+               readAndflattenSentiWS("SentiWS/positive-words.txt"))
+neg.words <- c(scan("SentiWS/negative-words.txt",what='character', comment.char=';', quiet=T), 
+              readAndflattenSentiWS("SentiWS/negative-words.txt"))
+
+score.sentiment = function(sentences, pos.words, neg.words, .progress='none') {
+  require(plyr)
+  require(stringr)
+  scores = laply(sentences, function(sentence, pos.words, neg.words) 
+  {
+    # clean up sentences with R's regex-driven global substitute, gsub():
+    sentence = gsub('[[:punct:]]', '', sentence)
+    sentence = gsub('[[:cntrl:]]', '', sentence)
+    sentence = gsub('\\d+', '', sentence)
+    # and convert to lower case:
+    sentence = tolower(sentence)
+    # split into words. str_split is in the stringr package
+    word.list = str_split(sentence, '\\s+')
+    # sometimes a list() is one level of hierarchy too much
+    words = unlist(word.list)
+    # compare our words to the dictionaries of positive & negative terms
+    pos.matches = match(words, pos.words)
+    neg.matches = match(words, neg.words)
+    # match() returns the position of the matched term or NA
+    # I don't just want a TRUE/FALSE! How can I do this?
+    pos.matches = !is.na(pos.matches)
+    neg.matches = !is.na(neg.matches)
+    # and conveniently enough, TRUE/FALSE will be treated as 1/0 by sum():
+    score = sum(pos.matches) - sum(neg.matches)
+    return(score)
+  }, 
+  pos.words, neg.words, .progress=.progress )
+  scores.df = data.frame(score=scores, text=sentences)
+  return(scores.df)
+}
+
+score_pm_demo <- score.sentiment(pm_demo$content, pos.words, neg.words)
+score_tw_demo <- score.sentiment(tw_demo$tweet_text, pos.words, neg.words)
+
+ggplot(score_pm_demo) +
+  geom_bar(aes(x = score), fill = "blue") +
+  labs(title = "Topic: Demonstrationen", subtitle = "Sentiment-Analyse der Pressemeldungen") +
+  theme_minimal()
+
+ggplot(score_tw_demo) +
+  geom_bar(aes(x = score), fill = "blue") +
+  labs(title = "Topic: Demonstrationen", subtitle = "Sentiment-Analyse der Tweets") +
+  theme_minimal()
+
+score_pm_drogen <- score.sentiment(pm_demo$content, pos.words, neg.words)
+score_tw_drogen <- score.sentiment(tw_demo$tweet_text, pos.words, neg.words)
+
+ggplot(score_pm_drogen) +
+  geom_bar(aes(x = score), fill = "darkgreen") +
+  labs(title = "Topic: Drogen", subtitle = "Sentiment-Analyse der Pressemeldungen") +
+  theme_minimal()
+
+ggplot(score_tw_drogen) +
+  geom_bar(aes(x = score), fill = "darkgreen") +
+  labs(title = "Topic: Drogen", subtitle = "Sentiment-Analyse der Tweets") +
+  theme_minimal()
+
+score_pm_rass <- score.sentiment(pm_rass$content, pos.words, neg.words)
+score_tw_rass <- score.sentiment(tw_rass$tweet_text, pos.words, neg.words)
+
+ggplot(score_pm_rass) +
+  geom_bar(aes(x = score), fill = "purple") +
+  labs(title = "Topic: Rassismus", subtitle = "Sentiment-Analyse der Pressemeldungen") +
+  theme_minimal()
+
+ggplot(score_tw_rass) +
+  geom_bar(aes(x = score), fill = "purple") +
+  labs(title = "Topic: Rassismus", subtitle = "Sentiment-Analyse der Tweets") +
+  theme_minimal()
+```
+
+```{r}
+sessionInfo()
+```
--- a/ergebnisse_hackathon_repo/team-16/r-scripts/copbird-group-16.Rproj
+++ b/ergebnisse_hackathon_repo/team-16/r-scripts/copbird-group-16.Rproj
@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX