unifying handles and usernames

This commit is contained in:
Peter Kannewitz 2023-03-27 21:30:05 +02:00
parent 5816077c16
commit abe05ce248
4 changed files with 790 additions and 577 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
.jupyter/

View file

@ -12,7 +12,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"id": "9bd1686f-9bbc-4c05-a5f5-e0c4ce653fb2", "id": "9bd1686f-9bbc-4c05-a5f5-e0c4ce653fb2",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -37,7 +37,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 119, "execution_count": 117,
"id": "fcc48831-7999-4d79-b722-736715b1ced6", "id": "fcc48831-7999-4d79-b722-736715b1ced6",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -46,10 +46,10 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"((479991, 3), (151690, 8), (151690, 4), (13327, 5))" "((479991, 3), (151690, 8), (151690, 4), (13327, 3))"
] ]
}, },
"execution_count": 119, "execution_count": 117,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -79,6 +79,14 @@
" how = \"outer\",\n", " how = \"outer\",\n",
" suffixes = [\"_2021\", \"_2022\"])\n", " suffixes = [\"_2021\", \"_2022\"])\n",
"\n", "\n",
"# Some usernames corresponding to one user_id have changed overtime. For easier handling only the latest username and handle is kept\n",
"tweets_user = tweets_user.assign(handle = tweets_user.apply(lambda row: row['handle_2021'] if pd.isna(row['handle_2022']) else row['handle_2022'], axis=1),\n",
" user_name = tweets_user.apply(lambda row: row['user_name_2021'] if pd.isna(row['user_name_2022']) else row['user_name_2022'], axis=1)\n",
" ).drop(['handle_2021', 'handle_2022', 'user_name_2021', 'user_name_2022'], axis =1)\n",
"\n",
"police_stations = pd.read_csv(\"data/polizei_accounts_geo.csv\", sep = \"\\t\" # addiditional on police stations\n",
" ).rename(columns = {\"Polizei Account\": \"handle\"})\n",
"\n",
"tweets_meta.shape, tweets_statistics.shape, tweets_text.shape, tweets_user.shape" "tweets_meta.shape, tweets_statistics.shape, tweets_text.shape, tweets_user.shape"
] ]
}, },
@ -92,56 +100,39 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 150, "execution_count": 118,
"id": "cf409591-74a0-48dc-8f9e-66f7229f58cd", "id": "f30c2799-02c6-4e6a-ae36-9e039545b6b3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Merge like statistics, tweet text and user information in one data frame\n",
"tweets_combined = pd.merge(tweets_statistics, \n",
" tweets_text,\n",
" on = 'tweet_id').merge(tweets_user, on = 'user_id'\n",
" ).drop(['id'], axis = 1) # drop unascessary id column (redundant to index)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 119,
"id": "bd407aba-eec1-41ed-bff9-4c5fcdf6cb9d",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [ "outputs": [
{ {
"data": { "name": "stderr",
"text/plain": [ "output_type": "stream",
"tweet_id int64\n", "text": [
"like_count int64\n", "/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/IPython/lib/pretty.py:778: FutureWarning: In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.\n",
"retweet_count int64\n", " output = repr(obj)\n",
"reply_count int64\n", "/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.\n",
"quote_count int64\n", " return method()\n"
"measured_at object\n", ]
"is_deleted float64\n", },
"tweet_text object\n",
"created_at object\n",
"user_id int64\n",
"user_name_2021 object\n",
"handle_2021 object\n",
"handle_2022 object\n",
"user_name_2022 object\n",
"dtype: object"
]
},
"execution_count": 150,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tweets_combined = pd.merge(tweets_statistics, \n",
" tweets_text,\n",
" on = 'tweet_id').merge(tweets_user, on = 'user_id'\n",
" ).drop(['id'], axis = 1) # drop unascessary id column (redundant to index)\n",
" \n",
"# Convert Counts to integer values\n",
"tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(-99).astype(int)\n",
"tweets_combined.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "e312a975-3921-44ee-a7c5-37736678bc3f",
"metadata": {
"tags": []
},
"outputs": [
{ {
"data": { "data": {
"text/html": [ "text/html": [
@ -163,124 +154,273 @@
" <thead>\n", " <thead>\n",
" <tr style=\"text-align: right;\">\n", " <tr style=\"text-align: right;\">\n",
" <th></th>\n", " <th></th>\n",
" <th>tweet_id</th>\n",
" <th>like_count</th>\n",
" <th>retweet_count</th>\n",
" <th>reply_count</th>\n",
" <th>quote_count</th>\n",
" <th>measured_at</th>\n",
" <th>is_deleted</th>\n",
" <th>tweet_text</th>\n",
" <th>created_at</th>\n",
" <th>user_id</th>\n", " <th>user_id</th>\n",
" <th>handle</th>\n", " <th>handle</th>\n",
" <th>username</th>\n", " <th>user_name</th>\n",
" </tr>\n", " </tr>\n",
" </thead>\n", " </thead>\n",
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>0</th>\n",
" <td>1000004686156652545</td>\n", " <td>1321021123463663616</td>\n",
" <td>6jannik9</td>\n", " <td>2</td>\n",
" <td>Systemstratege:</td>\n", " <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>@mahanna196 Da die Stadt keine Ausnahme für Ra...</td>\n",
" <td>2020-10-27 09:29:13</td>\n",
" <td>778895426007203840</td>\n",
" <td>polizei_ol</td>\n",
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>1</th>\n",
" <td>1000043230870867969</td>\n", " <td>1321037834246066181</td>\n",
" <td>lsollik</td>\n", " <td>2</td>\n",
" <td>Physiolucy</td>\n", " <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>@mahanna196 Ja. *sr</td>\n",
" <td>2020-10-27 10:35:38</td>\n",
" <td>778895426007203840</td>\n",
" <td>polizei_ol</td>\n",
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n", " <th>2</th>\n",
" <td>1000405847460151296</td>\n", " <td>1321068234955776000</td>\n",
" <td>achim1949hans</td>\n", " <td>19</td>\n",
" <td>Systemstratege:</td>\n", " <td>3</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>#Aktuell Auf dem ehem. Bundeswehrkrankenhausge...</td>\n",
" <td>2020-10-27 12:36:26</td>\n",
" <td>778895426007203840</td>\n",
" <td>polizei_ol</td>\n",
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>3</th>\n", " <th>3</th>\n",
" <td>1000460805719121921</td>\n", " <td>1321073940199100416</td>\n",
" <td>wahrew</td>\n", " <td>0</td>\n",
" <td>WahreWorte</td>\n", " <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>@Emma36166433 Bitte lesen Sie unseren Tweet 2/...</td>\n",
" <td>2020-10-27 12:59:06</td>\n",
" <td>778895426007203840</td>\n",
" <td>polizei_ol</td>\n",
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>4</th>\n", " <th>4</th>\n",
" <td>1000744009638252544</td>\n", " <td>1321088646506754049</td>\n",
" <td>derd1ck3</td>\n", " <td>2</td>\n",
" <td>Ⓓ①ⓒⓚ①③ (🏡)</td>\n", " <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>In der vergangenen Woche wurde die Wohnung des...</td>\n",
" <td>2020-10-27 13:57:32</td>\n",
" <td>778895426007203840</td>\n",
" <td>polizei_ol</td>\n",
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>...</th>\n", " <th>...</th>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>11554</th>\n", " <th>151685</th>\n",
" <td>99931264</td>\n", " <td>1625828803804004354</td>\n",
" <td>havok1975</td>\n", " <td>5</td>\n",
" <td>Systemstratege:</td>\n", " <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2023-02-19 13:40:36</td>\n",
" <td>False</td>\n",
" <td>#Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 ...</td>\n",
" <td>2023-02-15 12:06:07</td>\n",
" <td>1168873095614160896</td>\n",
" <td>polizeiberlin_p</td>\n",
" <td>Polizei Berlin Prävention</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>11555</th>\n", " <th>151686</th>\n",
" <td>999542638226403328</td>\n", " <td>1628004105623900167</td>\n",
" <td>madame_de_saxe</td>\n", " <td>2</td>\n",
" <td>Systemstratege:</td>\n", " <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2023-02-25 13:14:49</td>\n",
" <td>False</td>\n",
" <td>Unser Präventionsteam vom #A44 berät heute und...</td>\n",
" <td>2023-02-21 12:10:00</td>\n",
" <td>1168873095614160896</td>\n",
" <td>polizeiberlin_p</td>\n",
" <td>Polizei Berlin Prävention</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>11556</th>\n", " <th>151687</th>\n",
" <td>999901133282754560</td>\n", " <td>1628004810183016448</td>\n",
" <td>tungstendie74</td>\n", " <td>6</td>\n",
" <td>Systemstratege:</td>\n", " <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2023-02-25 13:14:49</td>\n",
" <td>False</td>\n",
" <td>Auch unser #A52 war heute aktiv und hat zum Th...</td>\n",
" <td>2023-02-21 12:12:48</td>\n",
" <td>1168873095614160896</td>\n",
" <td>polizeiberlin_p</td>\n",
" <td>Polizei Berlin Prävention</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>11557</th>\n", " <th>151688</th>\n",
" <td>999904275080794112</td>\n", " <td>1628352896352878593</td>\n",
" <td>_danielheim</td>\n", " <td>2</td>\n",
" <td>Systemstratege:</td>\n", " <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2023-02-26 13:15:05</td>\n",
" <td>False</td>\n",
" <td>Gestern führte unser #A13 in einer Wohnsiedlun...</td>\n",
" <td>2023-02-22 11:15:58</td>\n",
" <td>1168873095614160896</td>\n",
" <td>polizeiberlin_p</td>\n",
" <td>Polizei Berlin Prävention</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>11558</th>\n", " <th>151689</th>\n",
" <td>999955376454930432</td>\n", " <td>1628709531998998529</td>\n",
" <td>amyman6010</td>\n", " <td>10</td>\n",
" <td>Systemstratege:</td>\n", " <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2023-02-27 12:17:33</td>\n",
" <td>False</td>\n",
" <td>Auf dem Gelände der @BUFAStudios (Oberlandstr....</td>\n",
" <td>2023-02-23 10:53:07</td>\n",
" <td>1168873095614160896</td>\n",
" <td>polizeiberlin_p</td>\n",
" <td>Polizei Berlin Prävention</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"<p>11559 rows × 3 columns</p>\n", "<p>151690 rows × 12 columns</p>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" user_id handle username\n", " tweet_id like_count retweet_count reply_count \\\n",
"0 1000004686156652545 6jannik9 Systemstratege: \n", "0 1321021123463663616 2 1 2 \n",
"1 1000043230870867969 lsollik Physiolucy\n", "1 1321037834246066181 2 0 0 \n",
"2 1000405847460151296 achim1949hans Systemstratege: \n", "2 1321068234955776000 19 3 3 \n",
"3 1000460805719121921 wahrew WahreWorte\n", "3 1321073940199100416 0 0 0 \n",
"4 1000744009638252544 derd1ck3 Ⓓ①ⓒⓚ①③ (🏡)\n", "4 1321088646506754049 2 0 0 \n",
"... ... ... ...\n", "... ... ... ... ... \n",
"11554 99931264 havok1975 Systemstratege: \n", "151685 1625828803804004354 5 1 1 \n",
"11555 999542638226403328 madame_de_saxe Systemstratege: \n", "151686 1628004105623900167 2 0 0 \n",
"11556 999901133282754560 tungstendie74 Systemstratege: \n", "151687 1628004810183016448 6 0 0 \n",
"11557 999904275080794112 _danielheim Systemstratege: \n", "151688 1628352896352878593 2 0 0 \n",
"11558 999955376454930432 amyman6010 Systemstratege: \n", "151689 1628709531998998529 10 1 0 \n",
"\n", "\n",
"[11559 rows x 3 columns]" " quote_count measured_at is_deleted \\\n",
"0 0 NaT NaN \n",
"1 0 NaT NaN \n",
"2 0 NaT NaN \n",
"3 0 NaT NaN \n",
"4 0 NaT NaN \n",
"... ... ... ... \n",
"151685 0 2023-02-19 13:40:36 False \n",
"151686 0 2023-02-25 13:14:49 False \n",
"151687 0 2023-02-25 13:14:49 False \n",
"151688 0 2023-02-26 13:15:05 False \n",
"151689 0 2023-02-27 12:17:33 False \n",
"\n",
" tweet_text created_at \\\n",
"0 @mahanna196 Da die Stadt keine Ausnahme für Ra... 2020-10-27 09:29:13 \n",
"1 @mahanna196 Ja. *sr 2020-10-27 10:35:38 \n",
"2 #Aktuell Auf dem ehem. Bundeswehrkrankenhausge... 2020-10-27 12:36:26 \n",
"3 @Emma36166433 Bitte lesen Sie unseren Tweet 2/... 2020-10-27 12:59:06 \n",
"4 In der vergangenen Woche wurde die Wohnung des... 2020-10-27 13:57:32 \n",
"... ... ... \n",
"151685 #Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 ... 2023-02-15 12:06:07 \n",
"151686 Unser Präventionsteam vom #A44 berät heute und... 2023-02-21 12:10:00 \n",
"151687 Auch unser #A52 war heute aktiv und hat zum Th... 2023-02-21 12:12:48 \n",
"151688 Gestern führte unser #A13 in einer Wohnsiedlun... 2023-02-22 11:15:58 \n",
"151689 Auf dem Gelände der @BUFAStudios (Oberlandstr.... 2023-02-23 10:53:07 \n",
"\n",
" user_id handle \\\n",
"0 778895426007203840 polizei_ol \n",
"1 778895426007203840 polizei_ol \n",
"2 778895426007203840 polizei_ol \n",
"3 778895426007203840 polizei_ol \n",
"4 778895426007203840 polizei_ol \n",
"... ... ... \n",
"151685 1168873095614160896 polizeiberlin_p \n",
"151686 1168873095614160896 polizeiberlin_p \n",
"151687 1168873095614160896 polizeiberlin_p \n",
"151688 1168873095614160896 polizeiberlin_p \n",
"151689 1168873095614160896 polizeiberlin_p \n",
"\n",
" user_name \n",
"0 Polizei Oldenburg-Stadt/Ammerland \n",
"1 Polizei Oldenburg-Stadt/Ammerland \n",
"2 Polizei Oldenburg-Stadt/Ammerland \n",
"3 Polizei Oldenburg-Stadt/Ammerland \n",
"4 Polizei Oldenburg-Stadt/Ammerland \n",
"... ... \n",
"151685 Polizei Berlin Prävention \n",
"151686 Polizei Berlin Prävention \n",
"151687 Polizei Berlin Prävention \n",
"151688 Polizei Berlin Prävention \n",
"151689 Polizei Berlin Prävention \n",
"\n",
"[151690 rows x 12 columns]"
] ]
}, },
"execution_count": 44, "execution_count": 119,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"tweets_meta = pd.read_csv(\"data/tweets.csv\")\n", "# Convert Counts to integer values\n",
"tweets_time = pd.read_csv(\"data/tweets-1679742620302.csv\")\n", "tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(-99).astype(int)\n",
"tweets_text = pd.read_csv(\"data/tweets-1679742698645.csv\")\n",
"tweets_user = pd.read_csv(\"data/tweets-1679742702794.csv\"\n",
" ).rename(columns = {\"username\":\"handle\", # rename columns\n",
" \"handle\": \"username\"})\n",
"tweets_user = tweets_user.assign(handle = tweets_user['handle'].str.lower()) # convert handles to lower case\n",
"tweets_combined = pd.merge(tweets_time, # merge the two tweet related data frames\n",
" tweets_text, \n",
" how = 'inner', \n",
" on = 'tweet_id'\n",
" ).drop(['id'], # drop unascessary id column (redundant to index)\n",
" axis = 1)\n",
"tweets_combined = tweets_combined.assign(measured_at = pd.to_datetime(tweets_combined['measured_at']), # change date to date format\n", "tweets_combined = tweets_combined.assign(measured_at = pd.to_datetime(tweets_combined['measured_at']), # change date to date format\n",
" created_at = pd.to_datetime(tweets_combined['created_at']))\n", " created_at = pd.to_datetime(tweets_combined['created_at']),\n",
"police_stations = pd.read_csv(\"data/polizei_accounts_geo.csv\", sep = \"\\t\" # addiditional on police stations\n", " handle = tweets_combined['handle'].str.lower(),\n",
" ).rename(columns = {\"Polizei Account\": \"handle\"})\n", " is_deleted = tweets_combined['is_deleted'].map(lambda x: False if x == 0.0 else ( True if x == 1.0 else np.nan)))\n",
"tweets_user" "tweets_combined"
] ]
}, },
{ {
@ -337,7 +477,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 112,
"id": "0e5eb455-6b12-4572-8f5e-f328a94bd797", "id": "0e5eb455-6b12-4572-8f5e-f328a94bd797",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -346,13 +486,13 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"hashtag 157145\n", "hashtag 267255\n",
"url 88322\n", "url 141594\n",
"mention 36815\n", "mention 71142\n",
"Name: entity_type, dtype: int64" "Name: entity_type, dtype: int64"
] ]
}, },
"execution_count": 7, "execution_count": 112,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -369,12 +509,12 @@
"tags": [] "tags": []
}, },
"source": [ "source": [
"Insgesamt haben wir 84794 einzigartige Tweets:" "Insgesamt haben wir 151690 einzigartige Tweets:"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 113,
"id": "5a438e7f-8735-40bb-b450-2ce168f0f67a", "id": "5a438e7f-8735-40bb-b450-2ce168f0f67a",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -383,10 +523,10 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"84794" "151690"
] ]
}, },
"execution_count": 8, "execution_count": 113,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -397,7 +537,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 114,
"id": "4f1e8c6c-3610-436e-899e-4d0307259230", "id": "4f1e8c6c-3610-436e-899e-4d0307259230",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -407,12 +547,12 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Die Tweets wurden vom 2022-02-24 bis zum: 2023-03-16 gesammelt. Also genau insgesamt: 384 Tage.\n" "Die Tweets wurden vom 2020-10-27 bis zum: 2023-03-16 gesammelt. Also genau insgesamt: 870 Tage. (Mit kleinen Unterbrechungen)\n"
] ]
} }
], ],
"source": [ "source": [
"print(\"Die Tweets wurden vom \", tweets_combined['created_at'].min().date(), \"bis zum:\", tweets_combined['created_at'].max().date(), \"gesammelt.\", \"Also genau insgesamt:\", (tweets_combined['created_at'].max() - tweets_combined['created_at'].min()).days, \"Tage.\")\n", "print(\"Die Tweets wurden vom \", tweets_combined['created_at'].min().date(), \"bis zum:\", tweets_combined['created_at'].max().date(), \"gesammelt.\", \"Also genau insgesamt:\", (tweets_combined['created_at'].max() - tweets_combined['created_at'].min()).days, \"Tage. (Mit kleinen Unterbrechungen)\")\n",
"# tweets_combined[tweets_combined['created_at'] == tweets_combined['created_at'].max()] # Tweets vom letzten Tag" "# tweets_combined[tweets_combined['created_at'] == tweets_combined['created_at'].max()] # Tweets vom letzten Tag"
] ]
}, },
@ -428,9 +568,11 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 43, "execution_count": 122,
"id": "9373552e-6baf-46df-ae16-c63603e20a83", "id": "9373552e-6baf-46df-ae16-c63603e20a83",
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [ "outputs": [
{ {
"data": { "data": {
@ -467,7 +609,7 @@
" <tr>\n", " <tr>\n",
" <th>11</th>\n", " <th>11</th>\n",
" <td>polizei_ffm</td>\n", " <td>polizei_ffm</td>\n",
" <td>2993</td>\n", " <td>5512</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -476,20 +618,9 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>3</th>\n",
" <td>polizei_nrw_do</td>\n",
" <td>2860</td>\n",
" <td>Polizei NRW DO</td>\n",
" <td>Polizei</td>\n",
" <td>Nordrhein-Westfalen</td>\n",
" <td>Dortmund</td>\n",
" <td>51.5142273</td>\n",
" <td>7.4652789</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n", " <th>0</th>\n",
" <td>polizeisachsen</td>\n", " <td>polizeisachsen</td>\n",
" <td>2700</td>\n", " <td>5340</td>\n",
" <td>Polizei Sachsen</td>\n", " <td>Polizei Sachsen</td>\n",
" <td>Polizei</td>\n", " <td>Polizei</td>\n",
" <td>Sachsen</td>\n", " <td>Sachsen</td>\n",
@ -498,9 +629,20 @@
" <td>13.7381437</td>\n", " <td>13.7381437</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>91</th>\n", " <th>3</th>\n",
" <td>polizei_nrw_do</td>\n",
" <td>4895</td>\n",
" <td>Polizei NRW DO</td>\n",
" <td>Polizei</td>\n",
" <td>Nordrhein-Westfalen</td>\n",
" <td>Dortmund</td>\n",
" <td>51.5142273</td>\n",
" <td>7.4652789</td>\n",
" </tr>\n",
" <tr>\n",
" <th>92</th>\n",
" <td>polizeibb</td>\n", " <td>polizeibb</td>\n",
" <td>2310</td>\n", " <td>4323</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -511,7 +653,7 @@
" <tr>\n", " <tr>\n",
" <th>61</th>\n", " <th>61</th>\n",
" <td>polizeihamburg</td>\n", " <td>polizeihamburg</td>\n",
" <td>2093</td>\n", " <td>4042</td>\n",
" <td>Polizei Hamburg</td>\n", " <td>Polizei Hamburg</td>\n",
" <td>Polizei</td>\n", " <td>Polizei</td>\n",
" <td>Hamburg</td>\n", " <td>Hamburg</td>\n",
@ -525,35 +667,32 @@
], ],
"text/plain": [ "text/plain": [
" handle count Name Typ Bundesland \\\n", " handle count Name Typ Bundesland \\\n",
"11 polizei_ffm 2993 NaN NaN NaN \n", "11 polizei_ffm 5512 NaN NaN NaN \n",
"3 polizei_nrw_do 2860 Polizei NRW DO Polizei Nordrhein-Westfalen \n", "0 polizeisachsen 5340 Polizei Sachsen Polizei Sachsen \n",
"0 polizeisachsen 2700 Polizei Sachsen Polizei Sachsen \n", "3 polizei_nrw_do 4895 Polizei NRW DO Polizei Nordrhein-Westfalen \n",
"91 polizeibb 2310 NaN NaN NaN \n", "92 polizeibb 4323 NaN NaN NaN \n",
"61 polizeihamburg 2093 Polizei Hamburg Polizei Hamburg \n", "61 polizeihamburg 4042 Polizei Hamburg Polizei Hamburg \n",
"\n", "\n",
" Stadt LAT LONG \n", " Stadt LAT LONG \n",
"11 NaN NaN NaN \n", "11 NaN NaN NaN \n",
"3 Dortmund 51.5142273 7.4652789 \n",
"0 Dresden 51.0493286 13.7381437 \n", "0 Dresden 51.0493286 13.7381437 \n",
"91 NaN NaN NaN \n", "3 Dortmund 51.5142273 7.4652789 \n",
"92 NaN NaN NaN \n",
"61 Hamburg 53.550341 10.000654 " "61 Hamburg 53.550341 10.000654 "
] ]
}, },
"execution_count": 43, "execution_count": 122,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"tweets_agg = tweets_combined.merge(tweets_user,\n", "tweets_agg = tweets_combined.groupby(by = [\"user_id\", \"user_name\", \"handle\"]\n",
" on = \"user_id\"\n", " )[\"user_id\"].aggregate(['count']\n",
" ).groupby(by = [\"user_id\", \"handle\", \"username\"]\n", " ).merge(police_stations,\n",
" )[\"user_id\"].aggregate(['count']\n", " on = \"handle\",\n",
" ).merge(police_stations, \n", " how = \"left\"\n",
" on = \"handle\",\n", " ).sort_values(['count'], ascending=False)\n",
" how = \"left\"\n",
" ).sort_values(['count'], \n",
" ascending=False)\n",
"tweets_agg.shape\n", "tweets_agg.shape\n",
"activy_police_vis = tweets_agg[0:50]\n", "activy_police_vis = tweets_agg[0:50]\n",
"activy_police_vis.head()" "activy_police_vis.head()"
@ -571,23 +710,31 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 47, "execution_count": 123,
"id": "b1c39196-d1cc-4f82-8e01-7529e7b3046f", "id": "b1c39196-d1cc-4f82-8e01-7529e7b3046f",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [ "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n",
" for col_name, dtype in df.dtypes.iteritems():\n"
]
},
{ {
"data": { "data": {
"text/html": [ "text/html": [
"\n", "\n",
"<div id=\"altair-viz-a660bd38b72240eaae654b5e471932a6\"></div>\n", "<div id=\"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\"></div>\n",
"<script type=\"text/javascript\">\n", "<script type=\"text/javascript\">\n",
" var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n", " var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
" (function(spec, embedOpt){\n", " (function(spec, embedOpt){\n",
" let outputDiv = document.currentScript.previousElementSibling;\n", " let outputDiv = document.currentScript.previousElementSibling;\n",
" if (outputDiv.id !== \"altair-viz-a660bd38b72240eaae654b5e471932a6\") {\n", " if (outputDiv.id !== \"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\") {\n",
" outputDiv = document.getElementById(\"altair-viz-a660bd38b72240eaae654b5e471932a6\");\n", " outputDiv = document.getElementById(\"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\");\n",
" }\n", " }\n",
" const paths = {\n", " const paths = {\n",
" \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n", " \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
@ -633,14 +780,14 @@
" .catch(showError)\n", " .catch(showError)\n",
" .then(() => displayChart(vegaEmbed));\n", " .then(() => displayChart(vegaEmbed));\n",
" }\n", " }\n",
" })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-da2bacd5b3a57271f77be4dc435a345f\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"handle\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-da2bacd5b3a57271f77be4dc435a345f\": [{\"handle\": \"polizei_ffm\", \"count\": 2993, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_nrw_do\", \"count\": 2860, \"Name\": \"Polizei NRW DO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Dortmund\", \"LAT\": \"51.5142273\", \"LONG\": \"7.4652789\"}, {\"handle\": \"polizeisachsen\", \"count\": 2700, \"Name\": \"Polizei Sachsen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen\", \"Stadt\": \"Dresden\", \"LAT\": \"51.0493286\", \"LONG\": \"13.7381437\"}, {\"handle\": \"polizeibb\", \"count\": 2310, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeihamburg\", \"count\": 2093, \"Name\": \"Polizei Hamburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Hamburg\", \"Stadt\": \"Hamburg\", \"LAT\": \"53.550341\", \"LONG\": \"10.000654\"}, {\"handle\": \"polizeimuenchen\", \"count\": 2021, \"Name\": \"Polizei M\\u00fcnchen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"M\\u00fcnchen\", \"LAT\": \"48.135125\", \"LONG\": \"11.581981\"}, {\"handle\": \"polizeimfr\", \"count\": 1892, \"Name\": \"Polizei Mittelfranken\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"N\\u00fcrnberg\", \"LAT\": \"49.453872\", \"LONG\": \"11.077298\"}, {\"handle\": \"polizeimannheim\", \"count\": 1835, \"Name\": \"Polizei Mannheim\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Mannheim\", \"LAT\": \"49.4892913\", \"LONG\": \"8.4673098\"}, {\"handle\": \"polizei_nrw_bi\", \"count\": 1794, \"Name\": \"Polizei NRW BI\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bielefeld\", \"LAT\": \"52.0191005\", \"LONG\": \"8.531007\"}, {\"handle\": \"polizei_nrw_k\", \"count\": 1540, \"Name\": \"Polizei NRW K\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"K\\u00f6ln\", \"LAT\": \"50.938361\", \"LONG\": \"6.959974\"}, {\"handle\": \"bremenpolizei\", \"count\": 1417, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_kl\", \"count\": 1380, \"Name\": \"Polizei Kaiserslautern\", \"Typ\": \"Polizei\", \"Bundesland\": \"Rheinland-Pfalz\", \"Stadt\": \"Kaiserslautern\", \"LAT\": \"49.4432174\", \"LONG\": \"7.7689951\"}, {\"handle\": \"polizei_md\", \"count\": 1365, \"Name\": \"Polizei Magdeburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen-Anhalt\", \"Stadt\": \"Magdeburg\", \"LAT\": \"52.1315889\", \"LONG\": \"11.6399609\"}, {\"handle\": \"polizei_ka\", \"count\": 1356, \"Name\": \"Polizei Karlsruhe\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Karlsruhe\", \"LAT\": \"49.0068705\", \"LONG\": \"8.4034195\"}, {\"handle\": \"polizeiberlin\", \"count\": 1351, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}]}}, {\"mode\": \"vega-lite\"});\n", " })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-59538db49feb940cb722f8834432bfab\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"handle\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-59538db49feb940cb722f8834432bfab\": [{\"handle\": \"polizei_ffm\", \"count\": 5512, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeisachsen\", \"count\": 5340, \"Name\": \"Polizei Sachsen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen\", \"Stadt\": \"Dresden\", \"LAT\": \"51.0493286\", \"LONG\": \"13.7381437\"}, {\"handle\": \"polizei_nrw_do\", \"count\": 4895, \"Name\": \"Polizei NRW DO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Dortmund\", \"LAT\": \"51.5142273\", \"LONG\": \"7.4652789\"}, {\"handle\": \"polizeibb\", \"count\": 4323, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeihamburg\", \"count\": 4042, \"Name\": \"Polizei Hamburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Hamburg\", \"Stadt\": \"Hamburg\", \"LAT\": \"53.550341\", \"LONG\": \"10.000654\"}, {\"handle\": \"polizeimuenchen\", \"count\": 3951, \"Name\": \"Polizei M\\u00fcnchen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"M\\u00fcnchen\", \"LAT\": \"48.135125\", \"LONG\": \"11.581981\"}, {\"handle\": \"polizeimfr\", \"count\": 3317, \"Name\": \"Polizei Mittelfranken\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"N\\u00fcrnberg\", \"LAT\": \"49.453872\", \"LONG\": \"11.077298\"}, {\"handle\": \"polizeimannheim\", \"count\": 3300, \"Name\": \"Polizei Mannheim\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Mannheim\", \"LAT\": \"49.4892913\", \"LONG\": \"8.4673098\"}, {\"handle\": \"bremenpolizei\", \"count\": 2664, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_ka\", \"count\": 2568, \"Name\": \"Polizei Karlsruhe\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Karlsruhe\", \"LAT\": \"49.0068705\", \"LONG\": \"8.4034195\"}, {\"handle\": \"polizei_nrw_k\", \"count\": 2544, \"Name\": \"Polizei NRW K\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"K\\u00f6ln\", \"LAT\": \"50.938361\", \"LONG\": \"6.959974\"}, {\"handle\": \"polizei_nrw_bo\", \"count\": 2367, \"Name\": \"Polizei NRW BO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bochum\", \"LAT\": \"51.4818111\", \"LONG\": \"7.2196635\"}, {\"handle\": \"polizei_md\", \"count\": 2319, \"Name\": \"Polizei Magdeburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen-Anhalt\", \"Stadt\": \"Magdeburg\", \"LAT\": \"52.1315889\", \"LONG\": \"11.6399609\"}, {\"handle\": \"polizei_h\", \"count\": 2302, \"Name\": \"Polizei Hannover\", \"Typ\": \"Polizei\", \"Bundesland\": \"Niedersachsen\", \"Stadt\": \"Hannover\", \"LAT\": \"52.3744779\", \"LONG\": \"9.7385532\"}, {\"handle\": \"polizei_nrw_bi\", \"count\": 2299, \"Name\": \"Polizei NRW BI\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bielefeld\", \"LAT\": \"52.0191005\", \"LONG\": \"8.531007\"}]}}, {\"mode\": \"vega-lite\"});\n",
"</script>" "</script>"
], ],
"text/plain": [ "text/plain": [
"alt.Chart(...)" "alt.Chart(...)"
] ]
}, },
"execution_count": 47, "execution_count": 123,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -665,7 +812,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 90, "execution_count": 125,
"id": "d0549250-b11f-4762-8500-1134c53303b4", "id": "d0549250-b11f-4762-8500-1134c53303b4",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -674,32 +821,29 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"0 Die Gewalt, die unsere Kolleginnen &amp; Kollegen in der Silvesternacht erleben mussten, ist une...\n", "0 Die Gewalt, die unsere Kolleginnen &amp; Kollegen in der Silvesternacht erleben mussten, ist une...\n",
"1 WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...\n", "1 An diejenigen, die vergangene Nacht in eine Schule in #Gesundbrunnen eingebrochen sind und 242 T...\n",
"2 Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...\n", "2 WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...\n",
"3 Auf unserem #A45 in #lichterfelde) befindet sich gerade diese Fundhündin. Sie wurde am Hindenbur...\n", "3 Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...\n",
"4 @nexta_tv Wir haben das Video gesichert und leiten den Sachverhalt an die zuständigen Kolleginne...\n", "4 Weil wir dich schieben! @BVG_Kampagne 😉 https://t.co/N8kdlCxhz2\n",
" ... \n", " ... \n",
"84789 #Polizeimeldungen #Tagesticker\\n \\nAnhalt-Bitterfeld\\nhttps://t.co/tNLEzztL1o\\n \\nDessau-Roßlau\\...\n", "151685 Sinken die Temperaturen ❄, steigt zeitgleich das Risiko für Verkehrsteilnehmer. Höchste Zeit zu ...\n",
"84790 Am Mittwoch erhielten wir mehrere Anrufe über einen auffälligen Pkw-Fahrer (Reifen quietschen un...\n", "151686 📺Am Sonntag, um 19:50 Uhr, geht es bei #KripoLive im \\n@mdrde\\n auch um die Fahndung nach einem ...\n",
"84791 @Jonas5Luisa Kleiner Pro-Tipp von uns: Einfach mal auf den link klicken! ;)*cl\n", "151687 Musik verbindet!\\nUnser #Adventskalender der #Bundespolizei startet morgen ➡ https://t.co/V6CaTV...\n",
"84792 Vermisstensuche nach 27-Jährigem aus Bendorf-Mühlhofen: Wer hat Tobias Wißmann gesehen? Ein Foto...\n", "151688 @gretchen_hann Hallo, diese Frage kann die Bundespolizei Spezialkräfte besser beantworten. Richt...\n",
"84793 #PolizeiNRW #Köln #Leverkusen : XXX - Infos unter https://t.co/SeWShP2tZE https://t.co/Kopy7w8W3B\n", "151689 #Bönen #Holzwickede - Verstöße gegen Coronaschutzverordnung: Polizei löst Gaststättenabend und F...\n",
"Name: tweet_text, Length: 84794, dtype: object" "Name: tweet_text, Length: 151690, dtype: object"
] ]
}, },
"execution_count": 90, "execution_count": 125,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"tweets_attention = tweets_combined.merge(tweets_user,\n", "tweets_attention = tweets_combined.merge(police_stations,\n",
" on = \"user_id\",\n", " on = \"handle\",\n",
" how = \"left\"\n", " how = \"left\")\n",
" ).merge(police_stations,\n",
" on = \"handle\",\n",
" how = \"left\")\n",
"pd.options.display.max_colwidth = 100\n", "pd.options.display.max_colwidth = 100\n",
"tweets_attention.sort_values('like_count', ascending = False).reset_index()['tweet_text']\n", "tweets_attention.sort_values('like_count', ascending = False).reset_index()['tweet_text']\n",
"\n" "\n"
@ -841,7 +985,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 148, "execution_count": 121,
"id": "ed86b45e-9dd8-436d-9c96-15500ed93985", "id": "ed86b45e-9dd8-436d-9c96-15500ed93985",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -868,142 +1012,104 @@
" <thead>\n", " <thead>\n",
" <tr style=\"text-align: right;\">\n", " <tr style=\"text-align: right;\">\n",
" <th></th>\n", " <th></th>\n",
" <th>like_count</th>\n", " <th></th>\n",
" <th>retweet_count</th>\n", " <th>count</th>\n",
" <th>reply_count</th>\n", " </tr>\n",
" <th>quote_count</th>\n", " <tr>\n",
" <th>user_id</th>\n",
" <th>user_name</th>\n",
" <th></th>\n",
" </tr>\n", " </tr>\n",
" </thead>\n", " </thead>\n",
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>223758384</th>\n",
" <td>2</td>\n", " <th>Polizei Sachsen</th>\n",
" <td>1</td>\n", " <td>5340</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>259607457</th>\n",
" <td>2</td>\n", " <th>Polizei NRW K</th>\n",
" <td>0</td>\n", " <td>2544</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n", " <th>424895827</th>\n",
" <td>19</td>\n", " <th>Polizei Stuttgart</th>\n",
" <td>3</td>\n", " <td>1913</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>3</th>\n", " <th>769128278</th>\n",
" <td>0</td>\n", " <th>Polizei NRW DO</th>\n",
" <td>0</td>\n", " <td>4895</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>4</th>\n", " <th>775664780</th>\n",
" <td>2</td>\n", " <th>Polizei Rostock</th>\n",
" <td>0</td>\n", " <td>604</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>...</th>\n", " <th>...</th>\n",
" <td>...</td>\n", " <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>151685</th>\n", " <th>1169206134189830145</th>\n",
" <td>5</td>\n", " <th>Polizei Stendal</th>\n",
" <td>1</td>\n", " <td>842</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>151686</th>\n", " <th>1184022676488314880</th>\n",
" <td>2</td>\n", " <th>Polizei Pforzheim</th>\n",
" <td>0</td>\n", " <td>283</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>151687</th>\n", " <th>1184024283342950401</th>\n",
" <td>6</td>\n", " <th>Polizei Ravensburg</th>\n",
" <td>0</td>\n", " <td>460</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>151688</th>\n", " <th>1232548941889228808</th>\n",
" <td>2</td>\n", " <th>Systemstratege:</th>\n",
" <td>0</td>\n", " <td>168</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>151689</th>\n", " <th>1295978598034284546</th>\n",
" <td>10</td>\n", " <th>Polizei ZPD NI</th>\n",
" <td>1</td>\n", " <td>133</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"<p>151690 rows × 4 columns</p>\n", "<p>163 rows × 1 columns</p>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" like_count retweet_count reply_count quote_count\n", " count\n",
"0 2 1 2 0\n", "user_id user_name \n",
"1 2 0 0 0\n", "223758384 Polizei Sachsen 5340\n",
"2 19 3 3 0\n", "259607457 Polizei NRW K 2544\n",
"3 0 0 0 0\n", "424895827 Polizei Stuttgart 1913\n",
"4 2 0 0 0\n", "769128278 Polizei NRW DO 4895\n",
"... ... ... ... ...\n", "775664780 Polizei Rostock 604\n",
"151685 5 1 1 0\n", "... ...\n",
"151686 2 0 0 0\n", "1169206134189830145 Polizei Stendal 842\n",
"151687 6 0 0 0\n", "1184022676488314880 Polizei Pforzheim 283\n",
"151688 2 0 0 0\n", "1184024283342950401 Polizei Ravensburg 460\n",
"151689 10 1 0 0\n", "1232548941889228808 Systemstratege: 168\n",
"1295978598034284546 Polizei ZPD NI 133\n",
"\n", "\n",
"[151690 rows x 4 columns]" "[163 rows x 1 columns]"
] ]
}, },
"execution_count": 148, "execution_count": 121,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [] "source": [
}, "tweets_combined.groupby(by = [\"user_id\", \"user_name\"]\n",
{ " )[\"user_id\"].aggregate(['count']\n",
"cell_type": "code", " )"
"execution_count": 142, ]
"id": "dac4e5fc-22ca-466d-bc3c-586e68696d03",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"like_count\n",
"False 147573\n",
"True 4117\n",
"dtype: int64"
]
},
"execution_count": 142,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
} }
], ],
"metadata": { "metadata": {

View file

@ -1 +1 @@
{"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":0,"widgets":[]}},"down":{"size":0,"widgets":[]},"left":{"collapsed":false,"current":"filebrowser","widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0.2676740420939018,0.7323259579060982,0]},"file-browser-filebrowser:cwd":{"path":"data"}},"metadata":{"id":"default"}} {"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":1,"widgets":["notebook:zusammenfassung.ipynb"]},"current":"notebook:zusammenfassung.ipynb"},"down":{"size":0,"widgets":[]},"left":{"collapsed":false,"current":"filebrowser","widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0.17943235504652827,0.8205676449534718,0]},"file-browser-filebrowser:cwd":{"path":""},"notebook:zusammenfassung.ipynb":{"data":{"path":"zusammenfassung.ipynb","factory":"Notebook"}}},"metadata":{"id":"default"}}

View file

@ -12,7 +12,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"id": "9bd1686f-9bbc-4c05-a5f5-e0c4ce653fb2", "id": "9bd1686f-9bbc-4c05-a5f5-e0c4ce653fb2",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -37,7 +37,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 119, "execution_count": 117,
"id": "fcc48831-7999-4d79-b722-736715b1ced6", "id": "fcc48831-7999-4d79-b722-736715b1ced6",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -46,10 +46,10 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"((479991, 3), (151690, 8), (151690, 4), (13327, 5))" "((479991, 3), (151690, 8), (151690, 4), (13327, 3))"
] ]
}, },
"execution_count": 119, "execution_count": 117,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -79,6 +79,14 @@
" how = \"outer\",\n", " how = \"outer\",\n",
" suffixes = [\"_2021\", \"_2022\"])\n", " suffixes = [\"_2021\", \"_2022\"])\n",
"\n", "\n",
"# Some usernames corresponding to one user_id have changed overtime. For easier handling only the latest username and handle is kept\n",
"tweets_user = tweets_user.assign(handle = tweets_user.apply(lambda row: row['handle_2021'] if pd.isna(row['handle_2022']) else row['handle_2022'], axis=1),\n",
" user_name = tweets_user.apply(lambda row: row['user_name_2021'] if pd.isna(row['user_name_2022']) else row['user_name_2022'], axis=1)\n",
" ).drop(['handle_2021', 'handle_2022', 'user_name_2021', 'user_name_2022'], axis =1)\n",
"\n",
"police_stations = pd.read_csv(\"data/polizei_accounts_geo.csv\", sep = \"\\t\" # addiditional on police stations\n",
" ).rename(columns = {\"Polizei Account\": \"handle\"})\n",
"\n",
"tweets_meta.shape, tweets_statistics.shape, tweets_text.shape, tweets_user.shape" "tweets_meta.shape, tweets_statistics.shape, tweets_text.shape, tweets_user.shape"
] ]
}, },
@ -92,56 +100,39 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 150, "execution_count": 118,
"id": "cf409591-74a0-48dc-8f9e-66f7229f58cd", "id": "f30c2799-02c6-4e6a-ae36-9e039545b6b3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Merge like statistics, tweet text and user information in one data frame\n",
"tweets_combined = pd.merge(tweets_statistics, \n",
" tweets_text,\n",
" on = 'tweet_id').merge(tweets_user, on = 'user_id'\n",
" ).drop(['id'], axis = 1) # drop unascessary id column (redundant to index)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 119,
"id": "bd407aba-eec1-41ed-bff9-4c5fcdf6cb9d",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [ "outputs": [
{ {
"data": { "name": "stderr",
"text/plain": [ "output_type": "stream",
"tweet_id int64\n", "text": [
"like_count int64\n", "/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/IPython/lib/pretty.py:778: FutureWarning: In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.\n",
"retweet_count int64\n", " output = repr(obj)\n",
"reply_count int64\n", "/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.\n",
"quote_count int64\n", " return method()\n"
"measured_at object\n", ]
"is_deleted float64\n", },
"tweet_text object\n",
"created_at object\n",
"user_id int64\n",
"user_name_2021 object\n",
"handle_2021 object\n",
"handle_2022 object\n",
"user_name_2022 object\n",
"dtype: object"
]
},
"execution_count": 150,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tweets_combined = pd.merge(tweets_statistics, \n",
" tweets_text,\n",
" on = 'tweet_id').merge(tweets_user, on = 'user_id'\n",
" ).drop(['id'], axis = 1) # drop unascessary id column (redundant to index)\n",
" \n",
"# Convert Counts to integer values\n",
"tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(-99).astype(int)\n",
"tweets_combined.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "e312a975-3921-44ee-a7c5-37736678bc3f",
"metadata": {
"tags": []
},
"outputs": [
{ {
"data": { "data": {
"text/html": [ "text/html": [
@ -163,124 +154,273 @@
" <thead>\n", " <thead>\n",
" <tr style=\"text-align: right;\">\n", " <tr style=\"text-align: right;\">\n",
" <th></th>\n", " <th></th>\n",
" <th>tweet_id</th>\n",
" <th>like_count</th>\n",
" <th>retweet_count</th>\n",
" <th>reply_count</th>\n",
" <th>quote_count</th>\n",
" <th>measured_at</th>\n",
" <th>is_deleted</th>\n",
" <th>tweet_text</th>\n",
" <th>created_at</th>\n",
" <th>user_id</th>\n", " <th>user_id</th>\n",
" <th>handle</th>\n", " <th>handle</th>\n",
" <th>username</th>\n", " <th>user_name</th>\n",
" </tr>\n", " </tr>\n",
" </thead>\n", " </thead>\n",
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>0</th>\n",
" <td>1000004686156652545</td>\n", " <td>1321021123463663616</td>\n",
" <td>6jannik9</td>\n", " <td>2</td>\n",
" <td>Systemstratege:</td>\n", " <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>@mahanna196 Da die Stadt keine Ausnahme für Ra...</td>\n",
" <td>2020-10-27 09:29:13</td>\n",
" <td>778895426007203840</td>\n",
" <td>polizei_ol</td>\n",
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>1</th>\n",
" <td>1000043230870867969</td>\n", " <td>1321037834246066181</td>\n",
" <td>lsollik</td>\n", " <td>2</td>\n",
" <td>Physiolucy</td>\n", " <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>@mahanna196 Ja. *sr</td>\n",
" <td>2020-10-27 10:35:38</td>\n",
" <td>778895426007203840</td>\n",
" <td>polizei_ol</td>\n",
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n", " <th>2</th>\n",
" <td>1000405847460151296</td>\n", " <td>1321068234955776000</td>\n",
" <td>achim1949hans</td>\n", " <td>19</td>\n",
" <td>Systemstratege:</td>\n", " <td>3</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>#Aktuell Auf dem ehem. Bundeswehrkrankenhausge...</td>\n",
" <td>2020-10-27 12:36:26</td>\n",
" <td>778895426007203840</td>\n",
" <td>polizei_ol</td>\n",
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>3</th>\n", " <th>3</th>\n",
" <td>1000460805719121921</td>\n", " <td>1321073940199100416</td>\n",
" <td>wahrew</td>\n", " <td>0</td>\n",
" <td>WahreWorte</td>\n", " <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>@Emma36166433 Bitte lesen Sie unseren Tweet 2/...</td>\n",
" <td>2020-10-27 12:59:06</td>\n",
" <td>778895426007203840</td>\n",
" <td>polizei_ol</td>\n",
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>4</th>\n", " <th>4</th>\n",
" <td>1000744009638252544</td>\n", " <td>1321088646506754049</td>\n",
" <td>derd1ck3</td>\n", " <td>2</td>\n",
" <td>Ⓓ①ⓒⓚ①③ (🏡)</td>\n", " <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>In der vergangenen Woche wurde die Wohnung des...</td>\n",
" <td>2020-10-27 13:57:32</td>\n",
" <td>778895426007203840</td>\n",
" <td>polizei_ol</td>\n",
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>...</th>\n", " <th>...</th>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>11554</th>\n", " <th>151685</th>\n",
" <td>99931264</td>\n", " <td>1625828803804004354</td>\n",
" <td>havok1975</td>\n", " <td>5</td>\n",
" <td>Systemstratege:</td>\n", " <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2023-02-19 13:40:36</td>\n",
" <td>False</td>\n",
" <td>#Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 ...</td>\n",
" <td>2023-02-15 12:06:07</td>\n",
" <td>1168873095614160896</td>\n",
" <td>polizeiberlin_p</td>\n",
" <td>Polizei Berlin Prävention</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>11555</th>\n", " <th>151686</th>\n",
" <td>999542638226403328</td>\n", " <td>1628004105623900167</td>\n",
" <td>madame_de_saxe</td>\n", " <td>2</td>\n",
" <td>Systemstratege:</td>\n", " <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2023-02-25 13:14:49</td>\n",
" <td>False</td>\n",
" <td>Unser Präventionsteam vom #A44 berät heute und...</td>\n",
" <td>2023-02-21 12:10:00</td>\n",
" <td>1168873095614160896</td>\n",
" <td>polizeiberlin_p</td>\n",
" <td>Polizei Berlin Prävention</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>11556</th>\n", " <th>151687</th>\n",
" <td>999901133282754560</td>\n", " <td>1628004810183016448</td>\n",
" <td>tungstendie74</td>\n", " <td>6</td>\n",
" <td>Systemstratege:</td>\n", " <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2023-02-25 13:14:49</td>\n",
" <td>False</td>\n",
" <td>Auch unser #A52 war heute aktiv und hat zum Th...</td>\n",
" <td>2023-02-21 12:12:48</td>\n",
" <td>1168873095614160896</td>\n",
" <td>polizeiberlin_p</td>\n",
" <td>Polizei Berlin Prävention</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>11557</th>\n", " <th>151688</th>\n",
" <td>999904275080794112</td>\n", " <td>1628352896352878593</td>\n",
" <td>_danielheim</td>\n", " <td>2</td>\n",
" <td>Systemstratege:</td>\n", " <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2023-02-26 13:15:05</td>\n",
" <td>False</td>\n",
" <td>Gestern führte unser #A13 in einer Wohnsiedlun...</td>\n",
" <td>2023-02-22 11:15:58</td>\n",
" <td>1168873095614160896</td>\n",
" <td>polizeiberlin_p</td>\n",
" <td>Polizei Berlin Prävention</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>11558</th>\n", " <th>151689</th>\n",
" <td>999955376454930432</td>\n", " <td>1628709531998998529</td>\n",
" <td>amyman6010</td>\n", " <td>10</td>\n",
" <td>Systemstratege:</td>\n", " <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2023-02-27 12:17:33</td>\n",
" <td>False</td>\n",
" <td>Auf dem Gelände der @BUFAStudios (Oberlandstr....</td>\n",
" <td>2023-02-23 10:53:07</td>\n",
" <td>1168873095614160896</td>\n",
" <td>polizeiberlin_p</td>\n",
" <td>Polizei Berlin Prävention</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"<p>11559 rows × 3 columns</p>\n", "<p>151690 rows × 12 columns</p>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" user_id handle username\n", " tweet_id like_count retweet_count reply_count \\\n",
"0 1000004686156652545 6jannik9 Systemstratege: \n", "0 1321021123463663616 2 1 2 \n",
"1 1000043230870867969 lsollik Physiolucy\n", "1 1321037834246066181 2 0 0 \n",
"2 1000405847460151296 achim1949hans Systemstratege: \n", "2 1321068234955776000 19 3 3 \n",
"3 1000460805719121921 wahrew WahreWorte\n", "3 1321073940199100416 0 0 0 \n",
"4 1000744009638252544 derd1ck3 Ⓓ①ⓒⓚ①③ (🏡)\n", "4 1321088646506754049 2 0 0 \n",
"... ... ... ...\n", "... ... ... ... ... \n",
"11554 99931264 havok1975 Systemstratege: \n", "151685 1625828803804004354 5 1 1 \n",
"11555 999542638226403328 madame_de_saxe Systemstratege: \n", "151686 1628004105623900167 2 0 0 \n",
"11556 999901133282754560 tungstendie74 Systemstratege: \n", "151687 1628004810183016448 6 0 0 \n",
"11557 999904275080794112 _danielheim Systemstratege: \n", "151688 1628352896352878593 2 0 0 \n",
"11558 999955376454930432 amyman6010 Systemstratege: \n", "151689 1628709531998998529 10 1 0 \n",
"\n", "\n",
"[11559 rows x 3 columns]" " quote_count measured_at is_deleted \\\n",
"0 0 NaT NaN \n",
"1 0 NaT NaN \n",
"2 0 NaT NaN \n",
"3 0 NaT NaN \n",
"4 0 NaT NaN \n",
"... ... ... ... \n",
"151685 0 2023-02-19 13:40:36 False \n",
"151686 0 2023-02-25 13:14:49 False \n",
"151687 0 2023-02-25 13:14:49 False \n",
"151688 0 2023-02-26 13:15:05 False \n",
"151689 0 2023-02-27 12:17:33 False \n",
"\n",
" tweet_text created_at \\\n",
"0 @mahanna196 Da die Stadt keine Ausnahme für Ra... 2020-10-27 09:29:13 \n",
"1 @mahanna196 Ja. *sr 2020-10-27 10:35:38 \n",
"2 #Aktuell Auf dem ehem. Bundeswehrkrankenhausge... 2020-10-27 12:36:26 \n",
"3 @Emma36166433 Bitte lesen Sie unseren Tweet 2/... 2020-10-27 12:59:06 \n",
"4 In der vergangenen Woche wurde die Wohnung des... 2020-10-27 13:57:32 \n",
"... ... ... \n",
"151685 #Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 ... 2023-02-15 12:06:07 \n",
"151686 Unser Präventionsteam vom #A44 berät heute und... 2023-02-21 12:10:00 \n",
"151687 Auch unser #A52 war heute aktiv und hat zum Th... 2023-02-21 12:12:48 \n",
"151688 Gestern führte unser #A13 in einer Wohnsiedlun... 2023-02-22 11:15:58 \n",
"151689 Auf dem Gelände der @BUFAStudios (Oberlandstr.... 2023-02-23 10:53:07 \n",
"\n",
" user_id handle \\\n",
"0 778895426007203840 polizei_ol \n",
"1 778895426007203840 polizei_ol \n",
"2 778895426007203840 polizei_ol \n",
"3 778895426007203840 polizei_ol \n",
"4 778895426007203840 polizei_ol \n",
"... ... ... \n",
"151685 1168873095614160896 polizeiberlin_p \n",
"151686 1168873095614160896 polizeiberlin_p \n",
"151687 1168873095614160896 polizeiberlin_p \n",
"151688 1168873095614160896 polizeiberlin_p \n",
"151689 1168873095614160896 polizeiberlin_p \n",
"\n",
" user_name \n",
"0 Polizei Oldenburg-Stadt/Ammerland \n",
"1 Polizei Oldenburg-Stadt/Ammerland \n",
"2 Polizei Oldenburg-Stadt/Ammerland \n",
"3 Polizei Oldenburg-Stadt/Ammerland \n",
"4 Polizei Oldenburg-Stadt/Ammerland \n",
"... ... \n",
"151685 Polizei Berlin Prävention \n",
"151686 Polizei Berlin Prävention \n",
"151687 Polizei Berlin Prävention \n",
"151688 Polizei Berlin Prävention \n",
"151689 Polizei Berlin Prävention \n",
"\n",
"[151690 rows x 12 columns]"
] ]
}, },
"execution_count": 44, "execution_count": 119,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"tweets_meta = pd.read_csv(\"data/tweets.csv\")\n", "# Convert Counts to integer values\n",
"tweets_time = pd.read_csv(\"data/tweets-1679742620302.csv\")\n", "tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(-99).astype(int)\n",
"tweets_text = pd.read_csv(\"data/tweets-1679742698645.csv\")\n",
"tweets_user = pd.read_csv(\"data/tweets-1679742702794.csv\"\n",
" ).rename(columns = {\"username\":\"handle\", # rename columns\n",
" \"handle\": \"username\"})\n",
"tweets_user = tweets_user.assign(handle = tweets_user['handle'].str.lower()) # convert handles to lower case\n",
"tweets_combined = pd.merge(tweets_time, # merge the two tweet related data frames\n",
" tweets_text, \n",
" how = 'inner', \n",
" on = 'tweet_id'\n",
" ).drop(['id'], # drop unascessary id column (redundant to index)\n",
" axis = 1)\n",
"tweets_combined = tweets_combined.assign(measured_at = pd.to_datetime(tweets_combined['measured_at']), # change date to date format\n", "tweets_combined = tweets_combined.assign(measured_at = pd.to_datetime(tweets_combined['measured_at']), # change date to date format\n",
" created_at = pd.to_datetime(tweets_combined['created_at']))\n", " created_at = pd.to_datetime(tweets_combined['created_at']),\n",
"police_stations = pd.read_csv(\"data/polizei_accounts_geo.csv\", sep = \"\\t\" # addiditional on police stations\n", " handle = tweets_combined['handle'].str.lower(),\n",
" ).rename(columns = {\"Polizei Account\": \"handle\"})\n", " is_deleted = tweets_combined['is_deleted'].map(lambda x: False if x == 0.0 else ( True if x == 1.0 else np.nan)))\n",
"tweets_user" "tweets_combined"
] ]
}, },
{ {
@ -337,7 +477,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 112,
"id": "0e5eb455-6b12-4572-8f5e-f328a94bd797", "id": "0e5eb455-6b12-4572-8f5e-f328a94bd797",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -346,13 +486,13 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"hashtag 157145\n", "hashtag 267255\n",
"url 88322\n", "url 141594\n",
"mention 36815\n", "mention 71142\n",
"Name: entity_type, dtype: int64" "Name: entity_type, dtype: int64"
] ]
}, },
"execution_count": 7, "execution_count": 112,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -369,12 +509,12 @@
"tags": [] "tags": []
}, },
"source": [ "source": [
"Insgesamt haben wir 84794 einzigartige Tweets:" "Insgesamt haben wir 151690 einzigartige Tweets:"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 113,
"id": "5a438e7f-8735-40bb-b450-2ce168f0f67a", "id": "5a438e7f-8735-40bb-b450-2ce168f0f67a",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -383,10 +523,10 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"84794" "151690"
] ]
}, },
"execution_count": 8, "execution_count": 113,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -397,7 +537,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 114,
"id": "4f1e8c6c-3610-436e-899e-4d0307259230", "id": "4f1e8c6c-3610-436e-899e-4d0307259230",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -407,12 +547,12 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Die Tweets wurden vom 2022-02-24 bis zum: 2023-03-16 gesammelt. Also genau insgesamt: 384 Tage.\n" "Die Tweets wurden vom 2020-10-27 bis zum: 2023-03-16 gesammelt. Also genau insgesamt: 870 Tage. (Mit kleinen Unterbrechungen)\n"
] ]
} }
], ],
"source": [ "source": [
"print(\"Die Tweets wurden vom \", tweets_combined['created_at'].min().date(), \"bis zum:\", tweets_combined['created_at'].max().date(), \"gesammelt.\", \"Also genau insgesamt:\", (tweets_combined['created_at'].max() - tweets_combined['created_at'].min()).days, \"Tage.\")\n", "print(\"Die Tweets wurden vom \", tweets_combined['created_at'].min().date(), \"bis zum:\", tweets_combined['created_at'].max().date(), \"gesammelt.\", \"Also genau insgesamt:\", (tweets_combined['created_at'].max() - tweets_combined['created_at'].min()).days, \"Tage. (Mit kleinen Unterbrechungen)\")\n",
"# tweets_combined[tweets_combined['created_at'] == tweets_combined['created_at'].max()] # Tweets vom letzten Tag" "# tweets_combined[tweets_combined['created_at'] == tweets_combined['created_at'].max()] # Tweets vom letzten Tag"
] ]
}, },
@ -428,9 +568,11 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 43, "execution_count": 122,
"id": "9373552e-6baf-46df-ae16-c63603e20a83", "id": "9373552e-6baf-46df-ae16-c63603e20a83",
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [ "outputs": [
{ {
"data": { "data": {
@ -467,7 +609,7 @@
" <tr>\n", " <tr>\n",
" <th>11</th>\n", " <th>11</th>\n",
" <td>polizei_ffm</td>\n", " <td>polizei_ffm</td>\n",
" <td>2993</td>\n", " <td>5512</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -476,20 +618,9 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>3</th>\n",
" <td>polizei_nrw_do</td>\n",
" <td>2860</td>\n",
" <td>Polizei NRW DO</td>\n",
" <td>Polizei</td>\n",
" <td>Nordrhein-Westfalen</td>\n",
" <td>Dortmund</td>\n",
" <td>51.5142273</td>\n",
" <td>7.4652789</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n", " <th>0</th>\n",
" <td>polizeisachsen</td>\n", " <td>polizeisachsen</td>\n",
" <td>2700</td>\n", " <td>5340</td>\n",
" <td>Polizei Sachsen</td>\n", " <td>Polizei Sachsen</td>\n",
" <td>Polizei</td>\n", " <td>Polizei</td>\n",
" <td>Sachsen</td>\n", " <td>Sachsen</td>\n",
@ -498,9 +629,20 @@
" <td>13.7381437</td>\n", " <td>13.7381437</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>91</th>\n", " <th>3</th>\n",
" <td>polizei_nrw_do</td>\n",
" <td>4895</td>\n",
" <td>Polizei NRW DO</td>\n",
" <td>Polizei</td>\n",
" <td>Nordrhein-Westfalen</td>\n",
" <td>Dortmund</td>\n",
" <td>51.5142273</td>\n",
" <td>7.4652789</td>\n",
" </tr>\n",
" <tr>\n",
" <th>92</th>\n",
" <td>polizeibb</td>\n", " <td>polizeibb</td>\n",
" <td>2310</td>\n", " <td>4323</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -511,7 +653,7 @@
" <tr>\n", " <tr>\n",
" <th>61</th>\n", " <th>61</th>\n",
" <td>polizeihamburg</td>\n", " <td>polizeihamburg</td>\n",
" <td>2093</td>\n", " <td>4042</td>\n",
" <td>Polizei Hamburg</td>\n", " <td>Polizei Hamburg</td>\n",
" <td>Polizei</td>\n", " <td>Polizei</td>\n",
" <td>Hamburg</td>\n", " <td>Hamburg</td>\n",
@ -525,35 +667,32 @@
], ],
"text/plain": [ "text/plain": [
" handle count Name Typ Bundesland \\\n", " handle count Name Typ Bundesland \\\n",
"11 polizei_ffm 2993 NaN NaN NaN \n", "11 polizei_ffm 5512 NaN NaN NaN \n",
"3 polizei_nrw_do 2860 Polizei NRW DO Polizei Nordrhein-Westfalen \n", "0 polizeisachsen 5340 Polizei Sachsen Polizei Sachsen \n",
"0 polizeisachsen 2700 Polizei Sachsen Polizei Sachsen \n", "3 polizei_nrw_do 4895 Polizei NRW DO Polizei Nordrhein-Westfalen \n",
"91 polizeibb 2310 NaN NaN NaN \n", "92 polizeibb 4323 NaN NaN NaN \n",
"61 polizeihamburg 2093 Polizei Hamburg Polizei Hamburg \n", "61 polizeihamburg 4042 Polizei Hamburg Polizei Hamburg \n",
"\n", "\n",
" Stadt LAT LONG \n", " Stadt LAT LONG \n",
"11 NaN NaN NaN \n", "11 NaN NaN NaN \n",
"3 Dortmund 51.5142273 7.4652789 \n",
"0 Dresden 51.0493286 13.7381437 \n", "0 Dresden 51.0493286 13.7381437 \n",
"91 NaN NaN NaN \n", "3 Dortmund 51.5142273 7.4652789 \n",
"92 NaN NaN NaN \n",
"61 Hamburg 53.550341 10.000654 " "61 Hamburg 53.550341 10.000654 "
] ]
}, },
"execution_count": 43, "execution_count": 122,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"tweets_agg = tweets_combined.merge(tweets_user,\n", "tweets_agg = tweets_combined.groupby(by = [\"user_id\", \"user_name\", \"handle\"]\n",
" on = \"user_id\"\n", " )[\"user_id\"].aggregate(['count']\n",
" ).groupby(by = [\"user_id\", \"handle\", \"username\"]\n", " ).merge(police_stations,\n",
" )[\"user_id\"].aggregate(['count']\n", " on = \"handle\",\n",
" ).merge(police_stations, \n", " how = \"left\"\n",
" on = \"handle\",\n", " ).sort_values(['count'], ascending=False)\n",
" how = \"left\"\n",
" ).sort_values(['count'], \n",
" ascending=False)\n",
"tweets_agg.shape\n", "tweets_agg.shape\n",
"activy_police_vis = tweets_agg[0:50]\n", "activy_police_vis = tweets_agg[0:50]\n",
"activy_police_vis.head()" "activy_police_vis.head()"
@ -571,23 +710,31 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 47, "execution_count": 123,
"id": "b1c39196-d1cc-4f82-8e01-7529e7b3046f", "id": "b1c39196-d1cc-4f82-8e01-7529e7b3046f",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [ "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n",
" for col_name, dtype in df.dtypes.iteritems():\n"
]
},
{ {
"data": { "data": {
"text/html": [ "text/html": [
"\n", "\n",
"<div id=\"altair-viz-a660bd38b72240eaae654b5e471932a6\"></div>\n", "<div id=\"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\"></div>\n",
"<script type=\"text/javascript\">\n", "<script type=\"text/javascript\">\n",
" var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n", " var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
" (function(spec, embedOpt){\n", " (function(spec, embedOpt){\n",
" let outputDiv = document.currentScript.previousElementSibling;\n", " let outputDiv = document.currentScript.previousElementSibling;\n",
" if (outputDiv.id !== \"altair-viz-a660bd38b72240eaae654b5e471932a6\") {\n", " if (outputDiv.id !== \"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\") {\n",
" outputDiv = document.getElementById(\"altair-viz-a660bd38b72240eaae654b5e471932a6\");\n", " outputDiv = document.getElementById(\"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\");\n",
" }\n", " }\n",
" const paths = {\n", " const paths = {\n",
" \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n", " \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
@ -633,14 +780,14 @@
" .catch(showError)\n", " .catch(showError)\n",
" .then(() => displayChart(vegaEmbed));\n", " .then(() => displayChart(vegaEmbed));\n",
" }\n", " }\n",
" })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-da2bacd5b3a57271f77be4dc435a345f\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"handle\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-da2bacd5b3a57271f77be4dc435a345f\": [{\"handle\": \"polizei_ffm\", \"count\": 2993, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_nrw_do\", \"count\": 2860, \"Name\": \"Polizei NRW DO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Dortmund\", \"LAT\": \"51.5142273\", \"LONG\": \"7.4652789\"}, {\"handle\": \"polizeisachsen\", \"count\": 2700, \"Name\": \"Polizei Sachsen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen\", \"Stadt\": \"Dresden\", \"LAT\": \"51.0493286\", \"LONG\": \"13.7381437\"}, {\"handle\": \"polizeibb\", \"count\": 2310, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeihamburg\", \"count\": 2093, \"Name\": \"Polizei Hamburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Hamburg\", \"Stadt\": \"Hamburg\", \"LAT\": \"53.550341\", \"LONG\": \"10.000654\"}, {\"handle\": \"polizeimuenchen\", \"count\": 2021, \"Name\": \"Polizei M\\u00fcnchen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"M\\u00fcnchen\", \"LAT\": \"48.135125\", \"LONG\": \"11.581981\"}, {\"handle\": \"polizeimfr\", \"count\": 1892, \"Name\": \"Polizei Mittelfranken\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"N\\u00fcrnberg\", \"LAT\": \"49.453872\", \"LONG\": \"11.077298\"}, {\"handle\": \"polizeimannheim\", \"count\": 1835, \"Name\": \"Polizei Mannheim\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Mannheim\", \"LAT\": \"49.4892913\", \"LONG\": \"8.4673098\"}, {\"handle\": \"polizei_nrw_bi\", \"count\": 1794, \"Name\": \"Polizei NRW BI\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bielefeld\", \"LAT\": \"52.0191005\", \"LONG\": \"8.531007\"}, {\"handle\": \"polizei_nrw_k\", \"count\": 1540, \"Name\": \"Polizei NRW K\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"K\\u00f6ln\", \"LAT\": \"50.938361\", \"LONG\": \"6.959974\"}, {\"handle\": \"bremenpolizei\", \"count\": 1417, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_kl\", \"count\": 1380, \"Name\": \"Polizei Kaiserslautern\", \"Typ\": \"Polizei\", \"Bundesland\": \"Rheinland-Pfalz\", \"Stadt\": \"Kaiserslautern\", \"LAT\": \"49.4432174\", \"LONG\": \"7.7689951\"}, {\"handle\": \"polizei_md\", \"count\": 1365, \"Name\": \"Polizei Magdeburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen-Anhalt\", \"Stadt\": \"Magdeburg\", \"LAT\": \"52.1315889\", \"LONG\": \"11.6399609\"}, {\"handle\": \"polizei_ka\", \"count\": 1356, \"Name\": \"Polizei Karlsruhe\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Karlsruhe\", \"LAT\": \"49.0068705\", \"LONG\": \"8.4034195\"}, {\"handle\": \"polizeiberlin\", \"count\": 1351, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}]}}, {\"mode\": \"vega-lite\"});\n", " })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-59538db49feb940cb722f8834432bfab\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"handle\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-59538db49feb940cb722f8834432bfab\": [{\"handle\": \"polizei_ffm\", \"count\": 5512, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeisachsen\", \"count\": 5340, \"Name\": \"Polizei Sachsen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen\", \"Stadt\": \"Dresden\", \"LAT\": \"51.0493286\", \"LONG\": \"13.7381437\"}, {\"handle\": \"polizei_nrw_do\", \"count\": 4895, \"Name\": \"Polizei NRW DO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Dortmund\", \"LAT\": \"51.5142273\", \"LONG\": \"7.4652789\"}, {\"handle\": \"polizeibb\", \"count\": 4323, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeihamburg\", \"count\": 4042, \"Name\": \"Polizei Hamburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Hamburg\", \"Stadt\": \"Hamburg\", \"LAT\": \"53.550341\", \"LONG\": \"10.000654\"}, {\"handle\": \"polizeimuenchen\", \"count\": 3951, \"Name\": \"Polizei M\\u00fcnchen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"M\\u00fcnchen\", \"LAT\": \"48.135125\", \"LONG\": \"11.581981\"}, {\"handle\": \"polizeimfr\", \"count\": 3317, \"Name\": \"Polizei Mittelfranken\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"N\\u00fcrnberg\", \"LAT\": \"49.453872\", \"LONG\": \"11.077298\"}, {\"handle\": \"polizeimannheim\", \"count\": 3300, \"Name\": \"Polizei Mannheim\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Mannheim\", \"LAT\": \"49.4892913\", \"LONG\": \"8.4673098\"}, {\"handle\": \"bremenpolizei\", \"count\": 2664, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_ka\", \"count\": 2568, \"Name\": \"Polizei Karlsruhe\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Karlsruhe\", \"LAT\": \"49.0068705\", \"LONG\": \"8.4034195\"}, {\"handle\": \"polizei_nrw_k\", \"count\": 2544, \"Name\": \"Polizei NRW K\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"K\\u00f6ln\", \"LAT\": \"50.938361\", \"LONG\": \"6.959974\"}, {\"handle\": \"polizei_nrw_bo\", \"count\": 2367, \"Name\": \"Polizei NRW BO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bochum\", \"LAT\": \"51.4818111\", \"LONG\": \"7.2196635\"}, {\"handle\": \"polizei_md\", \"count\": 2319, \"Name\": \"Polizei Magdeburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen-Anhalt\", \"Stadt\": \"Magdeburg\", \"LAT\": \"52.1315889\", \"LONG\": \"11.6399609\"}, {\"handle\": \"polizei_h\", \"count\": 2302, \"Name\": \"Polizei Hannover\", \"Typ\": \"Polizei\", \"Bundesland\": \"Niedersachsen\", \"Stadt\": \"Hannover\", \"LAT\": \"52.3744779\", \"LONG\": \"9.7385532\"}, {\"handle\": \"polizei_nrw_bi\", \"count\": 2299, \"Name\": \"Polizei NRW BI\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bielefeld\", \"LAT\": \"52.0191005\", \"LONG\": \"8.531007\"}]}}, {\"mode\": \"vega-lite\"});\n",
"</script>" "</script>"
], ],
"text/plain": [ "text/plain": [
"alt.Chart(...)" "alt.Chart(...)"
] ]
}, },
"execution_count": 47, "execution_count": 123,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -665,7 +812,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 90, "execution_count": 125,
"id": "d0549250-b11f-4762-8500-1134c53303b4", "id": "d0549250-b11f-4762-8500-1134c53303b4",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -674,32 +821,29 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"0 Die Gewalt, die unsere Kolleginnen &amp; Kollegen in der Silvesternacht erleben mussten, ist une...\n", "0 Die Gewalt, die unsere Kolleginnen &amp; Kollegen in der Silvesternacht erleben mussten, ist une...\n",
"1 WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...\n", "1 An diejenigen, die vergangene Nacht in eine Schule in #Gesundbrunnen eingebrochen sind und 242 T...\n",
"2 Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...\n", "2 WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...\n",
"3 Auf unserem #A45 in #lichterfelde) befindet sich gerade diese Fundhündin. Sie wurde am Hindenbur...\n", "3 Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...\n",
"4 @nexta_tv Wir haben das Video gesichert und leiten den Sachverhalt an die zuständigen Kolleginne...\n", "4 Weil wir dich schieben! @BVG_Kampagne 😉 https://t.co/N8kdlCxhz2\n",
" ... \n", " ... \n",
"84789 #Polizeimeldungen #Tagesticker\\n \\nAnhalt-Bitterfeld\\nhttps://t.co/tNLEzztL1o\\n \\nDessau-Roßlau\\...\n", "151685 Sinken die Temperaturen ❄, steigt zeitgleich das Risiko für Verkehrsteilnehmer. Höchste Zeit zu ...\n",
"84790 Am Mittwoch erhielten wir mehrere Anrufe über einen auffälligen Pkw-Fahrer (Reifen quietschen un...\n", "151686 📺Am Sonntag, um 19:50 Uhr, geht es bei #KripoLive im \\n@mdrde\\n auch um die Fahndung nach einem ...\n",
"84791 @Jonas5Luisa Kleiner Pro-Tipp von uns: Einfach mal auf den link klicken! ;)*cl\n", "151687 Musik verbindet!\\nUnser #Adventskalender der #Bundespolizei startet morgen ➡ https://t.co/V6CaTV...\n",
"84792 Vermisstensuche nach 27-Jährigem aus Bendorf-Mühlhofen: Wer hat Tobias Wißmann gesehen? Ein Foto...\n", "151688 @gretchen_hann Hallo, diese Frage kann die Bundespolizei Spezialkräfte besser beantworten. Richt...\n",
"84793 #PolizeiNRW #Köln #Leverkusen : XXX - Infos unter https://t.co/SeWShP2tZE https://t.co/Kopy7w8W3B\n", "151689 #Bönen #Holzwickede - Verstöße gegen Coronaschutzverordnung: Polizei löst Gaststättenabend und F...\n",
"Name: tweet_text, Length: 84794, dtype: object" "Name: tweet_text, Length: 151690, dtype: object"
] ]
}, },
"execution_count": 90, "execution_count": 125,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"tweets_attention = tweets_combined.merge(tweets_user,\n", "tweets_attention = tweets_combined.merge(police_stations,\n",
" on = \"user_id\",\n", " on = \"handle\",\n",
" how = \"left\"\n", " how = \"left\")\n",
" ).merge(police_stations,\n",
" on = \"handle\",\n",
" how = \"left\")\n",
"pd.options.display.max_colwidth = 100\n", "pd.options.display.max_colwidth = 100\n",
"tweets_attention.sort_values('like_count', ascending = False).reset_index()['tweet_text']\n", "tweets_attention.sort_values('like_count', ascending = False).reset_index()['tweet_text']\n",
"\n" "\n"
@ -841,7 +985,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 148, "execution_count": 121,
"id": "ed86b45e-9dd8-436d-9c96-15500ed93985", "id": "ed86b45e-9dd8-436d-9c96-15500ed93985",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -868,142 +1012,104 @@
" <thead>\n", " <thead>\n",
" <tr style=\"text-align: right;\">\n", " <tr style=\"text-align: right;\">\n",
" <th></th>\n", " <th></th>\n",
" <th>like_count</th>\n", " <th></th>\n",
" <th>retweet_count</th>\n", " <th>count</th>\n",
" <th>reply_count</th>\n", " </tr>\n",
" <th>quote_count</th>\n", " <tr>\n",
" <th>user_id</th>\n",
" <th>user_name</th>\n",
" <th></th>\n",
" </tr>\n", " </tr>\n",
" </thead>\n", " </thead>\n",
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>223758384</th>\n",
" <td>2</td>\n", " <th>Polizei Sachsen</th>\n",
" <td>1</td>\n", " <td>5340</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>259607457</th>\n",
" <td>2</td>\n", " <th>Polizei NRW K</th>\n",
" <td>0</td>\n", " <td>2544</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n", " <th>424895827</th>\n",
" <td>19</td>\n", " <th>Polizei Stuttgart</th>\n",
" <td>3</td>\n", " <td>1913</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>3</th>\n", " <th>769128278</th>\n",
" <td>0</td>\n", " <th>Polizei NRW DO</th>\n",
" <td>0</td>\n", " <td>4895</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>4</th>\n", " <th>775664780</th>\n",
" <td>2</td>\n", " <th>Polizei Rostock</th>\n",
" <td>0</td>\n", " <td>604</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>...</th>\n", " <th>...</th>\n",
" <td>...</td>\n", " <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>151685</th>\n", " <th>1169206134189830145</th>\n",
" <td>5</td>\n", " <th>Polizei Stendal</th>\n",
" <td>1</td>\n", " <td>842</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>151686</th>\n", " <th>1184022676488314880</th>\n",
" <td>2</td>\n", " <th>Polizei Pforzheim</th>\n",
" <td>0</td>\n", " <td>283</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>151687</th>\n", " <th>1184024283342950401</th>\n",
" <td>6</td>\n", " <th>Polizei Ravensburg</th>\n",
" <td>0</td>\n", " <td>460</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>151688</th>\n", " <th>1232548941889228808</th>\n",
" <td>2</td>\n", " <th>Systemstratege:</th>\n",
" <td>0</td>\n", " <td>168</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>151689</th>\n", " <th>1295978598034284546</th>\n",
" <td>10</td>\n", " <th>Polizei ZPD NI</th>\n",
" <td>1</td>\n", " <td>133</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"<p>151690 rows × 4 columns</p>\n", "<p>163 rows × 1 columns</p>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" like_count retweet_count reply_count quote_count\n", " count\n",
"0 2 1 2 0\n", "user_id user_name \n",
"1 2 0 0 0\n", "223758384 Polizei Sachsen 5340\n",
"2 19 3 3 0\n", "259607457 Polizei NRW K 2544\n",
"3 0 0 0 0\n", "424895827 Polizei Stuttgart 1913\n",
"4 2 0 0 0\n", "769128278 Polizei NRW DO 4895\n",
"... ... ... ... ...\n", "775664780 Polizei Rostock 604\n",
"151685 5 1 1 0\n", "... ...\n",
"151686 2 0 0 0\n", "1169206134189830145 Polizei Stendal 842\n",
"151687 6 0 0 0\n", "1184022676488314880 Polizei Pforzheim 283\n",
"151688 2 0 0 0\n", "1184024283342950401 Polizei Ravensburg 460\n",
"151689 10 1 0 0\n", "1232548941889228808 Systemstratege: 168\n",
"1295978598034284546 Polizei ZPD NI 133\n",
"\n", "\n",
"[151690 rows x 4 columns]" "[163 rows x 1 columns]"
] ]
}, },
"execution_count": 148, "execution_count": 121,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [] "source": [
}, "tweets_combined.groupby(by = [\"user_id\", \"user_name\"]\n",
{ " )[\"user_id\"].aggregate(['count']\n",
"cell_type": "code", " )"
"execution_count": 142, ]
"id": "dac4e5fc-22ca-466d-bc3c-586e68696d03",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"like_count\n",
"False 147573\n",
"True 4117\n",
"dtype: int64"
]
},
"execution_count": 142,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
} }
], ],
"metadata": { "metadata": {