{ "cells": [ { "cell_type": "code", "execution_count": 40, "id": "5eecbeeb", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from tqdm import tqdm # Fortschrittsanzeige für pandas\n", "tqdm.pandas()\n", "tweet_csv = '../data/copbird_table_tweet.csv'\n", "entity_csv = '../data/copbird_table_entity.csv'\n", "user_csv = '../data/copbird_table_user.csv'\n", "\n", "limit = None\n", "tweets = pd.read_csv(tweet_csv, nrows=limit)\n", "entities = pd.read_csv(entity_csv, nrows=limit)\n", "users = pd.read_csv(user_csv, nrows=limit)" ] }, { "cell_type": "code", "execution_count": 41, "id": "1ad0f35a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "360008" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tweets.size" ] }, { "cell_type": "code", "execution_count": 42, "id": "c0a49030", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.8/dist-packages/pandas/core/strings/accessor.py:101: UserWarning: This pattern has match groups. To actually get the groups, use str.extract.\n", " return func(self, *args, **kwargs)\n" ] }, { "data": { "text/html": [ "
\n", " | id | \n", "tweet_text | \n", "created_at | \n", "user_id | \n", "like_count | \n", "retweet_count | \n", "reply_count | \n", "quote_count | \n", "contains | \n", "
---|---|---|---|---|---|---|---|---|---|
1 | \n", "1321023114071969792 | \n", "#Zeugengesucht\\nDie Hintergründe zu dem Tötung... | \n", "2020-10-27 09:37:08 | \n", "2397974054 | \n", "20.0 | \n", "24.0 | \n", "4.0 | \n", "1.0 | \n", "True | \n", "
2 | \n", "1321025127388188673 | \n", "RT @bka: EUROPE´S MOST WANTED – Sexualstraftät... | \n", "2020-10-27 09:45:08 | \n", "2397974054 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "True | \n", "