From 19ceaca42b7fc91c1b9bd275bec52cad71d3da05 Mon Sep 17 00:00:00 2001 From: lukaszett Date: Mon, 25 Jul 2022 22:29:53 +0200 Subject: [PATCH 1/2] add base sourcecode --- src/create_dataset.py | 11 +++++++++++ src/create_id_list.py | 8 ++++++++ 2 files changed, 19 insertions(+) create mode 100644 src/create_dataset.py create mode 100644 src/create_id_list.py diff --git a/src/create_dataset.py b/src/create_dataset.py new file mode 100644 index 0000000..2c90e8b --- /dev/null +++ b/src/create_dataset.py @@ -0,0 +1,11 @@ +import pandas as pd + +tweets_path = "../raw_data/tweets.csv" + +tweets = pd.read_csv(tweets_path) + +# TODO extract mentions from tweets + +# TODO generate cooccurence matrix + +# TODO save cooccurence matrix diff --git a/src/create_id_list.py b/src/create_id_list.py new file mode 100644 index 0000000..5e9bcd7 --- /dev/null +++ b/src/create_id_list.py @@ -0,0 +1,8 @@ +import pandas as pd + +tweets_path = "../raw_data/tweets.csv" + +tweets = pd.read_csv(tweets_path) + +for id in tweets.id: + print(id) From 2e38ea848ec6bb47869fc89ca3bae339016941db Mon Sep 17 00:00:00 2001 From: lukaszett Date: Mon, 25 Jul 2022 22:31:10 +0200 Subject: [PATCH 2/2] readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 59646cd..dd6d08f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,3 @@ # copbird-sna +This project uses pandas - either run `pip install pandas` to install the needed package or run these files in a conda-enviroment.