This commit is contained in:
Lukas Z 2022-07-28 17:24:07 +02:00
parent fda8ba9362
commit 607e0ffdc8
4 changed files with 9843 additions and 158 deletions

View file

@ -1,5 +1,6 @@
import pandas as pd
import re
import numpy as np
tweets_path = "../raw_data/tweets.csv"
accounts_path = "../raw_data/accounts.txt"
@ -32,5 +33,19 @@ mention_counts = tweets[["handle", "mentions"]].value_counts().reset_index()
cooc_matrix = pd.DataFrame(mention_counts).pivot(index="handle", columns="mentions",
values=0).reset_index()
cooc_matrix.set_index("handle",inplace=True)
# networkx wants the matrix to be square, so make sure it is suare
# add columns
for r in cooc_matrix.index:
if r not in cooc_matrix.columns:
print(f"{r} is not in colums")
cooc_matrix[r] = None
not_in_index = list(filter(lambda x: x not in cooc_matrix.index, cooc_matrix.columns))
new_rows = pd.DataFrame(index=not_in_index)
cooc_matrix = pd.concat([cooc_matrix, new_rows])
#save cooccurence matrix
cooc_matrix.to_csv(output_path, sep=";", index=False)
cooc_matrix.to_csv(output_path, sep=";")