mend
This commit is contained in:
parent
fda8ba9362
commit
607e0ffdc8
4 changed files with 9843 additions and 158 deletions
|
|
@ -1,5 +1,6 @@
|
|||
import pandas as pd
|
||||
import re
|
||||
import numpy as np
|
||||
|
||||
tweets_path = "../raw_data/tweets.csv"
|
||||
accounts_path = "../raw_data/accounts.txt"
|
||||
|
|
@ -32,5 +33,19 @@ mention_counts = tweets[["handle", "mentions"]].value_counts().reset_index()
|
|||
cooc_matrix = pd.DataFrame(mention_counts).pivot(index="handle", columns="mentions",
|
||||
values=0).reset_index()
|
||||
|
||||
cooc_matrix.set_index("handle",inplace=True)
|
||||
|
||||
# networkx wants the matrix to be square, so make sure it is suare
|
||||
# add columns
|
||||
for r in cooc_matrix.index:
|
||||
if r not in cooc_matrix.columns:
|
||||
print(f"{r} is not in colums")
|
||||
cooc_matrix[r] = None
|
||||
|
||||
not_in_index = list(filter(lambda x: x not in cooc_matrix.index, cooc_matrix.columns))
|
||||
new_rows = pd.DataFrame(index=not_in_index)
|
||||
cooc_matrix = pd.concat([cooc_matrix, new_rows])
|
||||
|
||||
|
||||
#save cooccurence matrix
|
||||
cooc_matrix.to_csv(output_path, sep=";", index=False)
|
||||
cooc_matrix.to_csv(output_path, sep=";")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue