diff --git a/README.md b/README.md index 59646cd..dd6d08f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,3 @@ # copbird-sna +This project uses pandas - either run `pip install pandas` to install the needed package or run these files in a conda-enviroment. diff --git a/src/create_dataset.py b/src/create_dataset.py new file mode 100644 index 0000000..2c90e8b --- /dev/null +++ b/src/create_dataset.py @@ -0,0 +1,11 @@ +import pandas as pd + +tweets_path = "../raw_data/tweets.csv" + +tweets = pd.read_csv(tweets_path) + +# TODO extract mentions from tweets + +# TODO generate cooccurence matrix + +# TODO save cooccurence matrix diff --git a/src/create_id_list.py b/src/create_id_list.py new file mode 100644 index 0000000..5e9bcd7 --- /dev/null +++ b/src/create_id_list.py @@ -0,0 +1,8 @@ +import pandas as pd + +tweets_path = "../raw_data/tweets.csv" + +tweets = pd.read_csv(tweets_path) + +for id in tweets.id: + print(id)