From 19ceaca42b7fc91c1b9bd275bec52cad71d3da05 Mon Sep 17 00:00:00 2001 From: lukaszett Date: Mon, 25 Jul 2022 22:29:53 +0200 Subject: [PATCH] add base sourcecode --- src/create_dataset.py | 11 +++++++++++ src/create_id_list.py | 8 ++++++++ 2 files changed, 19 insertions(+) create mode 100644 src/create_dataset.py create mode 100644 src/create_id_list.py diff --git a/src/create_dataset.py b/src/create_dataset.py new file mode 100644 index 0000000..2c90e8b --- /dev/null +++ b/src/create_dataset.py @@ -0,0 +1,11 @@ +import pandas as pd + +tweets_path = "../raw_data/tweets.csv" + +tweets = pd.read_csv(tweets_path) + +# TODO extract mentions from tweets + +# TODO generate cooccurence matrix + +# TODO save cooccurence matrix diff --git a/src/create_id_list.py b/src/create_id_list.py new file mode 100644 index 0000000..5e9bcd7 --- /dev/null +++ b/src/create_id_list.py @@ -0,0 +1,8 @@ +import pandas as pd + +tweets_path = "../raw_data/tweets.csv" + +tweets = pd.read_csv(tweets_path) + +for id in tweets.id: + print(id)