diff --git a/src/create_dataset.py b/src/create_dataset.py new file mode 100644 index 0000000..2c90e8b --- /dev/null +++ b/src/create_dataset.py @@ -0,0 +1,11 @@ +import pandas as pd + +tweets_path = "../raw_data/tweets.csv" + +tweets = pd.read_csv(tweets_path) + +# TODO extract mentions from tweets + +# TODO generate cooccurence matrix + +# TODO save cooccurence matrix diff --git a/src/create_id_list.py b/src/create_id_list.py new file mode 100644 index 0000000..5e9bcd7 --- /dev/null +++ b/src/create_id_list.py @@ -0,0 +1,8 @@ +import pandas as pd + +tweets_path = "../raw_data/tweets.csv" + +tweets = pd.read_csv(tweets_path) + +for id in tweets.id: + print(id)