unifying handles and usernames
This commit is contained in:
parent
5816077c16
commit
abe05ce248
4 changed files with 790 additions and 577 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
.jupyter/
|
|
@ -12,7 +12,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "9bd1686f-9bbc-4c05-a5f5-e0c4ce653fb2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -37,7 +37,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 119,
|
||||
"execution_count": 117,
|
||||
"id": "fcc48831-7999-4d79-b722-736715b1ced6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -46,10 +46,10 @@
|
|||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"((479991, 3), (151690, 8), (151690, 4), (13327, 5))"
|
||||
"((479991, 3), (151690, 8), (151690, 4), (13327, 3))"
|
||||
]
|
||||
},
|
||||
"execution_count": 119,
|
||||
"execution_count": 117,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -79,6 +79,14 @@
|
|||
" how = \"outer\",\n",
|
||||
" suffixes = [\"_2021\", \"_2022\"])\n",
|
||||
"\n",
|
||||
"# Some usernames corresponding to one user_id have changed overtime. For easier handling only the latest username and handle is kept\n",
|
||||
"tweets_user = tweets_user.assign(handle = tweets_user.apply(lambda row: row['handle_2021'] if pd.isna(row['handle_2022']) else row['handle_2022'], axis=1),\n",
|
||||
" user_name = tweets_user.apply(lambda row: row['user_name_2021'] if pd.isna(row['user_name_2022']) else row['user_name_2022'], axis=1)\n",
|
||||
" ).drop(['handle_2021', 'handle_2022', 'user_name_2021', 'user_name_2022'], axis =1)\n",
|
||||
"\n",
|
||||
"police_stations = pd.read_csv(\"data/polizei_accounts_geo.csv\", sep = \"\\t\" # addiditional on police stations\n",
|
||||
" ).rename(columns = {\"Polizei Account\": \"handle\"})\n",
|
||||
"\n",
|
||||
"tweets_meta.shape, tweets_statistics.shape, tweets_text.shape, tweets_user.shape"
|
||||
]
|
||||
},
|
||||
|
@ -92,56 +100,39 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 150,
|
||||
"id": "cf409591-74a0-48dc-8f9e-66f7229f58cd",
|
||||
"execution_count": 118,
|
||||
"id": "f30c2799-02c6-4e6a-ae36-9e039545b6b3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Merge like statistics, tweet text and user information in one data frame\n",
|
||||
"tweets_combined = pd.merge(tweets_statistics, \n",
|
||||
" tweets_text,\n",
|
||||
" on = 'tweet_id').merge(tweets_user, on = 'user_id'\n",
|
||||
" ).drop(['id'], axis = 1) # drop unascessary id column (redundant to index)\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 119,
|
||||
"id": "bd407aba-eec1-41ed-bff9-4c5fcdf6cb9d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"tweet_id int64\n",
|
||||
"like_count int64\n",
|
||||
"retweet_count int64\n",
|
||||
"reply_count int64\n",
|
||||
"quote_count int64\n",
|
||||
"measured_at object\n",
|
||||
"is_deleted float64\n",
|
||||
"tweet_text object\n",
|
||||
"created_at object\n",
|
||||
"user_id int64\n",
|
||||
"user_name_2021 object\n",
|
||||
"handle_2021 object\n",
|
||||
"handle_2022 object\n",
|
||||
"user_name_2022 object\n",
|
||||
"dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 150,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tweets_combined = pd.merge(tweets_statistics, \n",
|
||||
" tweets_text,\n",
|
||||
" on = 'tweet_id').merge(tweets_user, on = 'user_id'\n",
|
||||
" ).drop(['id'], axis = 1) # drop unascessary id column (redundant to index)\n",
|
||||
" \n",
|
||||
"# Convert Counts to integer values\n",
|
||||
"tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(-99).astype(int)\n",
|
||||
"tweets_combined.dtypes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"id": "e312a975-3921-44ee-a7c5-37736678bc3f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/IPython/lib/pretty.py:778: FutureWarning: In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.\n",
|
||||
" output = repr(obj)\n",
|
||||
"/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.\n",
|
||||
" return method()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
|
@ -163,124 +154,273 @@
|
|||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>tweet_id</th>\n",
|
||||
" <th>like_count</th>\n",
|
||||
" <th>retweet_count</th>\n",
|
||||
" <th>reply_count</th>\n",
|
||||
" <th>quote_count</th>\n",
|
||||
" <th>measured_at</th>\n",
|
||||
" <th>is_deleted</th>\n",
|
||||
" <th>tweet_text</th>\n",
|
||||
" <th>created_at</th>\n",
|
||||
" <th>user_id</th>\n",
|
||||
" <th>handle</th>\n",
|
||||
" <th>username</th>\n",
|
||||
" <th>user_name</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1000004686156652545</td>\n",
|
||||
" <td>6jannik9</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <td>1321021123463663616</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>@mahanna196 Da die Stadt keine Ausnahme für Ra...</td>\n",
|
||||
" <td>2020-10-27 09:29:13</td>\n",
|
||||
" <td>778895426007203840</td>\n",
|
||||
" <td>polizei_ol</td>\n",
|
||||
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>1000043230870867969</td>\n",
|
||||
" <td>lsollik</td>\n",
|
||||
" <td>Physiolucy</td>\n",
|
||||
" <td>1321037834246066181</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>@mahanna196 Ja. *sr</td>\n",
|
||||
" <td>2020-10-27 10:35:38</td>\n",
|
||||
" <td>778895426007203840</td>\n",
|
||||
" <td>polizei_ol</td>\n",
|
||||
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>1000405847460151296</td>\n",
|
||||
" <td>achim1949hans</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <td>1321068234955776000</td>\n",
|
||||
" <td>19</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>#Aktuell Auf dem ehem. Bundeswehrkrankenhausge...</td>\n",
|
||||
" <td>2020-10-27 12:36:26</td>\n",
|
||||
" <td>778895426007203840</td>\n",
|
||||
" <td>polizei_ol</td>\n",
|
||||
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>1000460805719121921</td>\n",
|
||||
" <td>wahrew</td>\n",
|
||||
" <td>WahreWorte</td>\n",
|
||||
" <td>1321073940199100416</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>@Emma36166433 Bitte lesen Sie unseren Tweet 2/...</td>\n",
|
||||
" <td>2020-10-27 12:59:06</td>\n",
|
||||
" <td>778895426007203840</td>\n",
|
||||
" <td>polizei_ol</td>\n",
|
||||
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>1000744009638252544</td>\n",
|
||||
" <td>derd1ck3</td>\n",
|
||||
" <td>Ⓓ①ⓒⓚ①③ (🏡)</td>\n",
|
||||
" <td>1321088646506754049</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>In der vergangenen Woche wurde die Wohnung des...</td>\n",
|
||||
" <td>2020-10-27 13:57:32</td>\n",
|
||||
" <td>778895426007203840</td>\n",
|
||||
" <td>polizei_ol</td>\n",
|
||||
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11554</th>\n",
|
||||
" <td>99931264</td>\n",
|
||||
" <td>havok1975</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <th>151685</th>\n",
|
||||
" <td>1625828803804004354</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2023-02-19 13:40:36</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>#Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 ...</td>\n",
|
||||
" <td>2023-02-15 12:06:07</td>\n",
|
||||
" <td>1168873095614160896</td>\n",
|
||||
" <td>polizeiberlin_p</td>\n",
|
||||
" <td>Polizei Berlin Prävention</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11555</th>\n",
|
||||
" <td>999542638226403328</td>\n",
|
||||
" <td>madame_de_saxe</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <th>151686</th>\n",
|
||||
" <td>1628004105623900167</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2023-02-25 13:14:49</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Unser Präventionsteam vom #A44 berät heute und...</td>\n",
|
||||
" <td>2023-02-21 12:10:00</td>\n",
|
||||
" <td>1168873095614160896</td>\n",
|
||||
" <td>polizeiberlin_p</td>\n",
|
||||
" <td>Polizei Berlin Prävention</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11556</th>\n",
|
||||
" <td>999901133282754560</td>\n",
|
||||
" <td>tungstendie74</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <th>151687</th>\n",
|
||||
" <td>1628004810183016448</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2023-02-25 13:14:49</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Auch unser #A52 war heute aktiv und hat zum Th...</td>\n",
|
||||
" <td>2023-02-21 12:12:48</td>\n",
|
||||
" <td>1168873095614160896</td>\n",
|
||||
" <td>polizeiberlin_p</td>\n",
|
||||
" <td>Polizei Berlin Prävention</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11557</th>\n",
|
||||
" <td>999904275080794112</td>\n",
|
||||
" <td>_danielheim</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <th>151688</th>\n",
|
||||
" <td>1628352896352878593</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2023-02-26 13:15:05</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Gestern führte unser #A13 in einer Wohnsiedlun...</td>\n",
|
||||
" <td>2023-02-22 11:15:58</td>\n",
|
||||
" <td>1168873095614160896</td>\n",
|
||||
" <td>polizeiberlin_p</td>\n",
|
||||
" <td>Polizei Berlin Prävention</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11558</th>\n",
|
||||
" <td>999955376454930432</td>\n",
|
||||
" <td>amyman6010</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <th>151689</th>\n",
|
||||
" <td>1628709531998998529</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2023-02-27 12:17:33</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Auf dem Gelände der @BUFAStudios (Oberlandstr....</td>\n",
|
||||
" <td>2023-02-23 10:53:07</td>\n",
|
||||
" <td>1168873095614160896</td>\n",
|
||||
" <td>polizeiberlin_p</td>\n",
|
||||
" <td>Polizei Berlin Prävention</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>11559 rows × 3 columns</p>\n",
|
||||
"<p>151690 rows × 12 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" user_id handle username\n",
|
||||
"0 1000004686156652545 6jannik9 Systemstratege: \n",
|
||||
"1 1000043230870867969 lsollik Physiolucy\n",
|
||||
"2 1000405847460151296 achim1949hans Systemstratege: \n",
|
||||
"3 1000460805719121921 wahrew WahreWorte\n",
|
||||
"4 1000744009638252544 derd1ck3 Ⓓ①ⓒⓚ①③ (🏡)\n",
|
||||
"... ... ... ...\n",
|
||||
"11554 99931264 havok1975 Systemstratege: \n",
|
||||
"11555 999542638226403328 madame_de_saxe Systemstratege: \n",
|
||||
"11556 999901133282754560 tungstendie74 Systemstratege: \n",
|
||||
"11557 999904275080794112 _danielheim Systemstratege: \n",
|
||||
"11558 999955376454930432 amyman6010 Systemstratege: \n",
|
||||
" tweet_id like_count retweet_count reply_count \\\n",
|
||||
"0 1321021123463663616 2 1 2 \n",
|
||||
"1 1321037834246066181 2 0 0 \n",
|
||||
"2 1321068234955776000 19 3 3 \n",
|
||||
"3 1321073940199100416 0 0 0 \n",
|
||||
"4 1321088646506754049 2 0 0 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"151685 1625828803804004354 5 1 1 \n",
|
||||
"151686 1628004105623900167 2 0 0 \n",
|
||||
"151687 1628004810183016448 6 0 0 \n",
|
||||
"151688 1628352896352878593 2 0 0 \n",
|
||||
"151689 1628709531998998529 10 1 0 \n",
|
||||
"\n",
|
||||
"[11559 rows x 3 columns]"
|
||||
" quote_count measured_at is_deleted \\\n",
|
||||
"0 0 NaT NaN \n",
|
||||
"1 0 NaT NaN \n",
|
||||
"2 0 NaT NaN \n",
|
||||
"3 0 NaT NaN \n",
|
||||
"4 0 NaT NaN \n",
|
||||
"... ... ... ... \n",
|
||||
"151685 0 2023-02-19 13:40:36 False \n",
|
||||
"151686 0 2023-02-25 13:14:49 False \n",
|
||||
"151687 0 2023-02-25 13:14:49 False \n",
|
||||
"151688 0 2023-02-26 13:15:05 False \n",
|
||||
"151689 0 2023-02-27 12:17:33 False \n",
|
||||
"\n",
|
||||
" tweet_text created_at \\\n",
|
||||
"0 @mahanna196 Da die Stadt keine Ausnahme für Ra... 2020-10-27 09:29:13 \n",
|
||||
"1 @mahanna196 Ja. *sr 2020-10-27 10:35:38 \n",
|
||||
"2 #Aktuell Auf dem ehem. Bundeswehrkrankenhausge... 2020-10-27 12:36:26 \n",
|
||||
"3 @Emma36166433 Bitte lesen Sie unseren Tweet 2/... 2020-10-27 12:59:06 \n",
|
||||
"4 In der vergangenen Woche wurde die Wohnung des... 2020-10-27 13:57:32 \n",
|
||||
"... ... ... \n",
|
||||
"151685 #Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 ... 2023-02-15 12:06:07 \n",
|
||||
"151686 Unser Präventionsteam vom #A44 berät heute und... 2023-02-21 12:10:00 \n",
|
||||
"151687 Auch unser #A52 war heute aktiv und hat zum Th... 2023-02-21 12:12:48 \n",
|
||||
"151688 Gestern führte unser #A13 in einer Wohnsiedlun... 2023-02-22 11:15:58 \n",
|
||||
"151689 Auf dem Gelände der @BUFAStudios (Oberlandstr.... 2023-02-23 10:53:07 \n",
|
||||
"\n",
|
||||
" user_id handle \\\n",
|
||||
"0 778895426007203840 polizei_ol \n",
|
||||
"1 778895426007203840 polizei_ol \n",
|
||||
"2 778895426007203840 polizei_ol \n",
|
||||
"3 778895426007203840 polizei_ol \n",
|
||||
"4 778895426007203840 polizei_ol \n",
|
||||
"... ... ... \n",
|
||||
"151685 1168873095614160896 polizeiberlin_p \n",
|
||||
"151686 1168873095614160896 polizeiberlin_p \n",
|
||||
"151687 1168873095614160896 polizeiberlin_p \n",
|
||||
"151688 1168873095614160896 polizeiberlin_p \n",
|
||||
"151689 1168873095614160896 polizeiberlin_p \n",
|
||||
"\n",
|
||||
" user_name \n",
|
||||
"0 Polizei Oldenburg-Stadt/Ammerland \n",
|
||||
"1 Polizei Oldenburg-Stadt/Ammerland \n",
|
||||
"2 Polizei Oldenburg-Stadt/Ammerland \n",
|
||||
"3 Polizei Oldenburg-Stadt/Ammerland \n",
|
||||
"4 Polizei Oldenburg-Stadt/Ammerland \n",
|
||||
"... ... \n",
|
||||
"151685 Polizei Berlin Prävention \n",
|
||||
"151686 Polizei Berlin Prävention \n",
|
||||
"151687 Polizei Berlin Prävention \n",
|
||||
"151688 Polizei Berlin Prävention \n",
|
||||
"151689 Polizei Berlin Prävention \n",
|
||||
"\n",
|
||||
"[151690 rows x 12 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 44,
|
||||
"execution_count": 119,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tweets_meta = pd.read_csv(\"data/tweets.csv\")\n",
|
||||
"tweets_time = pd.read_csv(\"data/tweets-1679742620302.csv\")\n",
|
||||
"tweets_text = pd.read_csv(\"data/tweets-1679742698645.csv\")\n",
|
||||
"tweets_user = pd.read_csv(\"data/tweets-1679742702794.csv\"\n",
|
||||
" ).rename(columns = {\"username\":\"handle\", # rename columns\n",
|
||||
" \"handle\": \"username\"})\n",
|
||||
"tweets_user = tweets_user.assign(handle = tweets_user['handle'].str.lower()) # convert handles to lower case\n",
|
||||
"tweets_combined = pd.merge(tweets_time, # merge the two tweet related data frames\n",
|
||||
" tweets_text, \n",
|
||||
" how = 'inner', \n",
|
||||
" on = 'tweet_id'\n",
|
||||
" ).drop(['id'], # drop unascessary id column (redundant to index)\n",
|
||||
" axis = 1)\n",
|
||||
"# Convert Counts to integer values\n",
|
||||
"tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(-99).astype(int)\n",
|
||||
"tweets_combined = tweets_combined.assign(measured_at = pd.to_datetime(tweets_combined['measured_at']), # change date to date format\n",
|
||||
" created_at = pd.to_datetime(tweets_combined['created_at']))\n",
|
||||
"police_stations = pd.read_csv(\"data/polizei_accounts_geo.csv\", sep = \"\\t\" # addiditional on police stations\n",
|
||||
" ).rename(columns = {\"Polizei Account\": \"handle\"})\n",
|
||||
"tweets_user"
|
||||
" created_at = pd.to_datetime(tweets_combined['created_at']),\n",
|
||||
" handle = tweets_combined['handle'].str.lower(),\n",
|
||||
" is_deleted = tweets_combined['is_deleted'].map(lambda x: False if x == 0.0 else ( True if x == 1.0 else np.nan)))\n",
|
||||
"tweets_combined"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -337,7 +477,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 112,
|
||||
"id": "0e5eb455-6b12-4572-8f5e-f328a94bd797",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -346,13 +486,13 @@
|
|||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"hashtag 157145\n",
|
||||
"url 88322\n",
|
||||
"mention 36815\n",
|
||||
"hashtag 267255\n",
|
||||
"url 141594\n",
|
||||
"mention 71142\n",
|
||||
"Name: entity_type, dtype: int64"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 112,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -369,12 +509,12 @@
|
|||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"Insgesamt haben wir 84794 einzigartige Tweets:"
|
||||
"Insgesamt haben wir 151690 einzigartige Tweets:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 113,
|
||||
"id": "5a438e7f-8735-40bb-b450-2ce168f0f67a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -383,10 +523,10 @@
|
|||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"84794"
|
||||
"151690"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 113,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -397,7 +537,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 114,
|
||||
"id": "4f1e8c6c-3610-436e-899e-4d0307259230",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -407,12 +547,12 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Die Tweets wurden vom 2022-02-24 bis zum: 2023-03-16 gesammelt. Also genau insgesamt: 384 Tage.\n"
|
||||
"Die Tweets wurden vom 2020-10-27 bis zum: 2023-03-16 gesammelt. Also genau insgesamt: 870 Tage. (Mit kleinen Unterbrechungen)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"Die Tweets wurden vom \", tweets_combined['created_at'].min().date(), \"bis zum:\", tweets_combined['created_at'].max().date(), \"gesammelt.\", \"Also genau insgesamt:\", (tweets_combined['created_at'].max() - tweets_combined['created_at'].min()).days, \"Tage.\")\n",
|
||||
"print(\"Die Tweets wurden vom \", tweets_combined['created_at'].min().date(), \"bis zum:\", tweets_combined['created_at'].max().date(), \"gesammelt.\", \"Also genau insgesamt:\", (tweets_combined['created_at'].max() - tweets_combined['created_at'].min()).days, \"Tage. (Mit kleinen Unterbrechungen)\")\n",
|
||||
"# tweets_combined[tweets_combined['created_at'] == tweets_combined['created_at'].max()] # Tweets vom letzten Tag"
|
||||
]
|
||||
},
|
||||
|
@ -428,9 +568,11 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 122,
|
||||
"id": "9373552e-6baf-46df-ae16-c63603e20a83",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
|
@ -467,7 +609,7 @@
|
|||
" <tr>\n",
|
||||
" <th>11</th>\n",
|
||||
" <td>polizei_ffm</td>\n",
|
||||
" <td>2993</td>\n",
|
||||
" <td>5512</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
|
@ -476,20 +618,9 @@
|
|||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>polizei_nrw_do</td>\n",
|
||||
" <td>2860</td>\n",
|
||||
" <td>Polizei NRW DO</td>\n",
|
||||
" <td>Polizei</td>\n",
|
||||
" <td>Nordrhein-Westfalen</td>\n",
|
||||
" <td>Dortmund</td>\n",
|
||||
" <td>51.5142273</td>\n",
|
||||
" <td>7.4652789</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>polizeisachsen</td>\n",
|
||||
" <td>2700</td>\n",
|
||||
" <td>5340</td>\n",
|
||||
" <td>Polizei Sachsen</td>\n",
|
||||
" <td>Polizei</td>\n",
|
||||
" <td>Sachsen</td>\n",
|
||||
|
@ -498,9 +629,20 @@
|
|||
" <td>13.7381437</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>91</th>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>polizei_nrw_do</td>\n",
|
||||
" <td>4895</td>\n",
|
||||
" <td>Polizei NRW DO</td>\n",
|
||||
" <td>Polizei</td>\n",
|
||||
" <td>Nordrhein-Westfalen</td>\n",
|
||||
" <td>Dortmund</td>\n",
|
||||
" <td>51.5142273</td>\n",
|
||||
" <td>7.4652789</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>92</th>\n",
|
||||
" <td>polizeibb</td>\n",
|
||||
" <td>2310</td>\n",
|
||||
" <td>4323</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
|
@ -511,7 +653,7 @@
|
|||
" <tr>\n",
|
||||
" <th>61</th>\n",
|
||||
" <td>polizeihamburg</td>\n",
|
||||
" <td>2093</td>\n",
|
||||
" <td>4042</td>\n",
|
||||
" <td>Polizei Hamburg</td>\n",
|
||||
" <td>Polizei</td>\n",
|
||||
" <td>Hamburg</td>\n",
|
||||
|
@ -525,35 +667,32 @@
|
|||
],
|
||||
"text/plain": [
|
||||
" handle count Name Typ Bundesland \\\n",
|
||||
"11 polizei_ffm 2993 NaN NaN NaN \n",
|
||||
"3 polizei_nrw_do 2860 Polizei NRW DO Polizei Nordrhein-Westfalen \n",
|
||||
"0 polizeisachsen 2700 Polizei Sachsen Polizei Sachsen \n",
|
||||
"91 polizeibb 2310 NaN NaN NaN \n",
|
||||
"61 polizeihamburg 2093 Polizei Hamburg Polizei Hamburg \n",
|
||||
"11 polizei_ffm 5512 NaN NaN NaN \n",
|
||||
"0 polizeisachsen 5340 Polizei Sachsen Polizei Sachsen \n",
|
||||
"3 polizei_nrw_do 4895 Polizei NRW DO Polizei Nordrhein-Westfalen \n",
|
||||
"92 polizeibb 4323 NaN NaN NaN \n",
|
||||
"61 polizeihamburg 4042 Polizei Hamburg Polizei Hamburg \n",
|
||||
"\n",
|
||||
" Stadt LAT LONG \n",
|
||||
"11 NaN NaN NaN \n",
|
||||
"3 Dortmund 51.5142273 7.4652789 \n",
|
||||
"0 Dresden 51.0493286 13.7381437 \n",
|
||||
"91 NaN NaN NaN \n",
|
||||
"3 Dortmund 51.5142273 7.4652789 \n",
|
||||
"92 NaN NaN NaN \n",
|
||||
"61 Hamburg 53.550341 10.000654 "
|
||||
]
|
||||
},
|
||||
"execution_count": 43,
|
||||
"execution_count": 122,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tweets_agg = tweets_combined.merge(tweets_user,\n",
|
||||
" on = \"user_id\"\n",
|
||||
" ).groupby(by = [\"user_id\", \"handle\", \"username\"]\n",
|
||||
" )[\"user_id\"].aggregate(['count']\n",
|
||||
" ).merge(police_stations, \n",
|
||||
" on = \"handle\",\n",
|
||||
" how = \"left\"\n",
|
||||
" ).sort_values(['count'], \n",
|
||||
" ascending=False)\n",
|
||||
"tweets_agg = tweets_combined.groupby(by = [\"user_id\", \"user_name\", \"handle\"]\n",
|
||||
" )[\"user_id\"].aggregate(['count']\n",
|
||||
" ).merge(police_stations,\n",
|
||||
" on = \"handle\",\n",
|
||||
" how = \"left\"\n",
|
||||
" ).sort_values(['count'], ascending=False)\n",
|
||||
"tweets_agg.shape\n",
|
||||
"activy_police_vis = tweets_agg[0:50]\n",
|
||||
"activy_police_vis.head()"
|
||||
|
@ -571,23 +710,31 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"execution_count": 123,
|
||||
"id": "b1c39196-d1cc-4f82-8e01-7529e7b3046f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n",
|
||||
" for col_name, dtype in df.dtypes.iteritems():\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"\n",
|
||||
"<div id=\"altair-viz-a660bd38b72240eaae654b5e471932a6\"></div>\n",
|
||||
"<div id=\"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\"></div>\n",
|
||||
"<script type=\"text/javascript\">\n",
|
||||
" var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
|
||||
" (function(spec, embedOpt){\n",
|
||||
" let outputDiv = document.currentScript.previousElementSibling;\n",
|
||||
" if (outputDiv.id !== \"altair-viz-a660bd38b72240eaae654b5e471932a6\") {\n",
|
||||
" outputDiv = document.getElementById(\"altair-viz-a660bd38b72240eaae654b5e471932a6\");\n",
|
||||
" if (outputDiv.id !== \"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\") {\n",
|
||||
" outputDiv = document.getElementById(\"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\");\n",
|
||||
" }\n",
|
||||
" const paths = {\n",
|
||||
" \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
|
||||
|
@ -633,14 +780,14 @@
|
|||
" .catch(showError)\n",
|
||||
" .then(() => displayChart(vegaEmbed));\n",
|
||||
" }\n",
|
||||
" })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-da2bacd5b3a57271f77be4dc435a345f\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"handle\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-da2bacd5b3a57271f77be4dc435a345f\": [{\"handle\": \"polizei_ffm\", \"count\": 2993, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_nrw_do\", \"count\": 2860, \"Name\": \"Polizei NRW DO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Dortmund\", \"LAT\": \"51.5142273\", \"LONG\": \"7.4652789\"}, {\"handle\": \"polizeisachsen\", \"count\": 2700, \"Name\": \"Polizei Sachsen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen\", \"Stadt\": \"Dresden\", \"LAT\": \"51.0493286\", \"LONG\": \"13.7381437\"}, {\"handle\": \"polizeibb\", \"count\": 2310, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeihamburg\", \"count\": 2093, \"Name\": \"Polizei Hamburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Hamburg\", \"Stadt\": \"Hamburg\", \"LAT\": \"53.550341\", \"LONG\": \"10.000654\"}, {\"handle\": \"polizeimuenchen\", \"count\": 2021, \"Name\": \"Polizei M\\u00fcnchen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"M\\u00fcnchen\", \"LAT\": \"48.135125\", \"LONG\": \"11.581981\"}, {\"handle\": \"polizeimfr\", \"count\": 1892, \"Name\": \"Polizei Mittelfranken\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"N\\u00fcrnberg\", \"LAT\": \"49.453872\", \"LONG\": \"11.077298\"}, {\"handle\": \"polizeimannheim\", \"count\": 1835, \"Name\": \"Polizei Mannheim\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Mannheim\", \"LAT\": \"49.4892913\", \"LONG\": \"8.4673098\"}, {\"handle\": \"polizei_nrw_bi\", \"count\": 1794, \"Name\": \"Polizei NRW BI\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bielefeld\", \"LAT\": \"52.0191005\", \"LONG\": \"8.531007\"}, {\"handle\": \"polizei_nrw_k\", \"count\": 1540, \"Name\": \"Polizei NRW K\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"K\\u00f6ln\", \"LAT\": \"50.938361\", \"LONG\": \"6.959974\"}, {\"handle\": \"bremenpolizei\", \"count\": 1417, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_kl\", \"count\": 1380, \"Name\": \"Polizei Kaiserslautern\", \"Typ\": \"Polizei\", \"Bundesland\": \"Rheinland-Pfalz\", \"Stadt\": \"Kaiserslautern\", \"LAT\": \"49.4432174\", \"LONG\": \"7.7689951\"}, {\"handle\": \"polizei_md\", \"count\": 1365, \"Name\": \"Polizei Magdeburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen-Anhalt\", \"Stadt\": \"Magdeburg\", \"LAT\": \"52.1315889\", \"LONG\": \"11.6399609\"}, {\"handle\": \"polizei_ka\", \"count\": 1356, \"Name\": \"Polizei Karlsruhe\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Karlsruhe\", \"LAT\": \"49.0068705\", \"LONG\": \"8.4034195\"}, {\"handle\": \"polizeiberlin\", \"count\": 1351, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}]}}, {\"mode\": \"vega-lite\"});\n",
|
||||
" })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-59538db49feb940cb722f8834432bfab\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"handle\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-59538db49feb940cb722f8834432bfab\": [{\"handle\": \"polizei_ffm\", \"count\": 5512, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeisachsen\", \"count\": 5340, \"Name\": \"Polizei Sachsen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen\", \"Stadt\": \"Dresden\", \"LAT\": \"51.0493286\", \"LONG\": \"13.7381437\"}, {\"handle\": \"polizei_nrw_do\", \"count\": 4895, \"Name\": \"Polizei NRW DO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Dortmund\", \"LAT\": \"51.5142273\", \"LONG\": \"7.4652789\"}, {\"handle\": \"polizeibb\", \"count\": 4323, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeihamburg\", \"count\": 4042, \"Name\": \"Polizei Hamburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Hamburg\", \"Stadt\": \"Hamburg\", \"LAT\": \"53.550341\", \"LONG\": \"10.000654\"}, {\"handle\": \"polizeimuenchen\", \"count\": 3951, \"Name\": \"Polizei M\\u00fcnchen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"M\\u00fcnchen\", \"LAT\": \"48.135125\", \"LONG\": \"11.581981\"}, {\"handle\": \"polizeimfr\", \"count\": 3317, \"Name\": \"Polizei Mittelfranken\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"N\\u00fcrnberg\", \"LAT\": \"49.453872\", \"LONG\": \"11.077298\"}, {\"handle\": \"polizeimannheim\", \"count\": 3300, \"Name\": \"Polizei Mannheim\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Mannheim\", \"LAT\": \"49.4892913\", \"LONG\": \"8.4673098\"}, {\"handle\": \"bremenpolizei\", \"count\": 2664, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_ka\", \"count\": 2568, \"Name\": \"Polizei Karlsruhe\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Karlsruhe\", \"LAT\": \"49.0068705\", \"LONG\": \"8.4034195\"}, {\"handle\": \"polizei_nrw_k\", \"count\": 2544, \"Name\": \"Polizei NRW K\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"K\\u00f6ln\", \"LAT\": \"50.938361\", \"LONG\": \"6.959974\"}, {\"handle\": \"polizei_nrw_bo\", \"count\": 2367, \"Name\": \"Polizei NRW BO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bochum\", \"LAT\": \"51.4818111\", \"LONG\": \"7.2196635\"}, {\"handle\": \"polizei_md\", \"count\": 2319, \"Name\": \"Polizei Magdeburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen-Anhalt\", \"Stadt\": \"Magdeburg\", \"LAT\": \"52.1315889\", \"LONG\": \"11.6399609\"}, {\"handle\": \"polizei_h\", \"count\": 2302, \"Name\": \"Polizei Hannover\", \"Typ\": \"Polizei\", \"Bundesland\": \"Niedersachsen\", \"Stadt\": \"Hannover\", \"LAT\": \"52.3744779\", \"LONG\": \"9.7385532\"}, {\"handle\": \"polizei_nrw_bi\", \"count\": 2299, \"Name\": \"Polizei NRW BI\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bielefeld\", \"LAT\": \"52.0191005\", \"LONG\": \"8.531007\"}]}}, {\"mode\": \"vega-lite\"});\n",
|
||||
"</script>"
|
||||
],
|
||||
"text/plain": [
|
||||
"alt.Chart(...)"
|
||||
]
|
||||
},
|
||||
"execution_count": 47,
|
||||
"execution_count": 123,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -665,7 +812,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 90,
|
||||
"execution_count": 125,
|
||||
"id": "d0549250-b11f-4762-8500-1134c53303b4",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -674,32 +821,29 @@
|
|||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"0 Die Gewalt, die unsere Kolleginnen & Kollegen in der Silvesternacht erleben mussten, ist une...\n",
|
||||
"1 WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...\n",
|
||||
"2 Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...\n",
|
||||
"3 Auf unserem #A45 in #lichterfelde) befindet sich gerade diese Fundhündin. Sie wurde am Hindenbur...\n",
|
||||
"4 @nexta_tv Wir haben das Video gesichert und leiten den Sachverhalt an die zuständigen Kolleginne...\n",
|
||||
" ... \n",
|
||||
"84789 #Polizeimeldungen #Tagesticker\\n \\nAnhalt-Bitterfeld\\nhttps://t.co/tNLEzztL1o\\n \\nDessau-Roßlau\\...\n",
|
||||
"84790 Am Mittwoch erhielten wir mehrere Anrufe über einen auffälligen Pkw-Fahrer (Reifen quietschen un...\n",
|
||||
"84791 @Jonas5Luisa Kleiner Pro-Tipp von uns: Einfach mal auf den link klicken! ;)*cl\n",
|
||||
"84792 Vermisstensuche nach 27-Jährigem aus Bendorf-Mühlhofen: Wer hat Tobias Wißmann gesehen? Ein Foto...\n",
|
||||
"84793 #PolizeiNRW #Köln #Leverkusen : XXX - Infos unter https://t.co/SeWShP2tZE https://t.co/Kopy7w8W3B\n",
|
||||
"Name: tweet_text, Length: 84794, dtype: object"
|
||||
"0 Die Gewalt, die unsere Kolleginnen & Kollegen in der Silvesternacht erleben mussten, ist une...\n",
|
||||
"1 An diejenigen, die vergangene Nacht in eine Schule in #Gesundbrunnen eingebrochen sind und 242 T...\n",
|
||||
"2 WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...\n",
|
||||
"3 Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...\n",
|
||||
"4 Weil wir dich schieben! @BVG_Kampagne 😉 https://t.co/N8kdlCxhz2\n",
|
||||
" ... \n",
|
||||
"151685 Sinken die Temperaturen ❄, steigt zeitgleich das Risiko für Verkehrsteilnehmer. Höchste Zeit zu ...\n",
|
||||
"151686 📺Am Sonntag, um 19:50 Uhr, geht es bei #KripoLive im \\n@mdrde\\n auch um die Fahndung nach einem ...\n",
|
||||
"151687 Musik verbindet!\\nUnser #Adventskalender der #Bundespolizei startet morgen ➡ https://t.co/V6CaTV...\n",
|
||||
"151688 @gretchen_hann Hallo, diese Frage kann die Bundespolizei Spezialkräfte besser beantworten. Richt...\n",
|
||||
"151689 #Bönen #Holzwickede - Verstöße gegen Coronaschutzverordnung: Polizei löst Gaststättenabend und F...\n",
|
||||
"Name: tweet_text, Length: 151690, dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 90,
|
||||
"execution_count": 125,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tweets_attention = tweets_combined.merge(tweets_user,\n",
|
||||
" on = \"user_id\",\n",
|
||||
" how = \"left\"\n",
|
||||
" ).merge(police_stations,\n",
|
||||
" on = \"handle\",\n",
|
||||
" how = \"left\")\n",
|
||||
"tweets_attention = tweets_combined.merge(police_stations,\n",
|
||||
" on = \"handle\",\n",
|
||||
" how = \"left\")\n",
|
||||
"pd.options.display.max_colwidth = 100\n",
|
||||
"tweets_attention.sort_values('like_count', ascending = False).reset_index()['tweet_text']\n",
|
||||
"\n"
|
||||
|
@ -841,7 +985,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 148,
|
||||
"execution_count": 121,
|
||||
"id": "ed86b45e-9dd8-436d-9c96-15500ed93985",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -868,142 +1012,104 @@
|
|||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>like_count</th>\n",
|
||||
" <th>retweet_count</th>\n",
|
||||
" <th>reply_count</th>\n",
|
||||
" <th>quote_count</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th>count</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>user_id</th>\n",
|
||||
" <th>user_name</th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>223758384</th>\n",
|
||||
" <th>Polizei Sachsen</th>\n",
|
||||
" <td>5340</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>259607457</th>\n",
|
||||
" <th>Polizei NRW K</th>\n",
|
||||
" <td>2544</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>19</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>424895827</th>\n",
|
||||
" <th>Polizei Stuttgart</th>\n",
|
||||
" <td>1913</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>769128278</th>\n",
|
||||
" <th>Polizei NRW DO</th>\n",
|
||||
" <td>4895</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>775664780</th>\n",
|
||||
" <th>Polizei Rostock</th>\n",
|
||||
" <td>604</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>151685</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>1169206134189830145</th>\n",
|
||||
" <th>Polizei Stendal</th>\n",
|
||||
" <td>842</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>151686</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>1184022676488314880</th>\n",
|
||||
" <th>Polizei Pforzheim</th>\n",
|
||||
" <td>283</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>151687</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>1184024283342950401</th>\n",
|
||||
" <th>Polizei Ravensburg</th>\n",
|
||||
" <td>460</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>151688</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>1232548941889228808</th>\n",
|
||||
" <th>Systemstratege:</th>\n",
|
||||
" <td>168</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>151689</th>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>1295978598034284546</th>\n",
|
||||
" <th>Polizei ZPD NI</th>\n",
|
||||
" <td>133</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>151690 rows × 4 columns</p>\n",
|
||||
"<p>163 rows × 1 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" like_count retweet_count reply_count quote_count\n",
|
||||
"0 2 1 2 0\n",
|
||||
"1 2 0 0 0\n",
|
||||
"2 19 3 3 0\n",
|
||||
"3 0 0 0 0\n",
|
||||
"4 2 0 0 0\n",
|
||||
"... ... ... ... ...\n",
|
||||
"151685 5 1 1 0\n",
|
||||
"151686 2 0 0 0\n",
|
||||
"151687 6 0 0 0\n",
|
||||
"151688 2 0 0 0\n",
|
||||
"151689 10 1 0 0\n",
|
||||
" count\n",
|
||||
"user_id user_name \n",
|
||||
"223758384 Polizei Sachsen 5340\n",
|
||||
"259607457 Polizei NRW K 2544\n",
|
||||
"424895827 Polizei Stuttgart 1913\n",
|
||||
"769128278 Polizei NRW DO 4895\n",
|
||||
"775664780 Polizei Rostock 604\n",
|
||||
"... ...\n",
|
||||
"1169206134189830145 Polizei Stendal 842\n",
|
||||
"1184022676488314880 Polizei Pforzheim 283\n",
|
||||
"1184024283342950401 Polizei Ravensburg 460\n",
|
||||
"1232548941889228808 Systemstratege: 168\n",
|
||||
"1295978598034284546 Polizei ZPD NI 133\n",
|
||||
"\n",
|
||||
"[151690 rows x 4 columns]"
|
||||
"[163 rows x 1 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 148,
|
||||
"execution_count": 121,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 142,
|
||||
"id": "dac4e5fc-22ca-466d-bc3c-586e68696d03",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"like_count\n",
|
||||
"False 147573\n",
|
||||
"True 4117\n",
|
||||
"dtype: int64"
|
||||
]
|
||||
},
|
||||
"execution_count": 142,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": []
|
||||
"source": [
|
||||
"tweets_combined.groupby(by = [\"user_id\", \"user_name\"]\n",
|
||||
" )[\"user_id\"].aggregate(['count']\n",
|
||||
" )"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":0,"widgets":[]}},"down":{"size":0,"widgets":[]},"left":{"collapsed":false,"current":"filebrowser","widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0.2676740420939018,0.7323259579060982,0]},"file-browser-filebrowser:cwd":{"path":"data"}},"metadata":{"id":"default"}}
|
||||
{"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":1,"widgets":["notebook:zusammenfassung.ipynb"]},"current":"notebook:zusammenfassung.ipynb"},"down":{"size":0,"widgets":[]},"left":{"collapsed":false,"current":"filebrowser","widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0.17943235504652827,0.8205676449534718,0]},"file-browser-filebrowser:cwd":{"path":""},"notebook:zusammenfassung.ipynb":{"data":{"path":"zusammenfassung.ipynb","factory":"Notebook"}}},"metadata":{"id":"default"}}
|
|
@ -12,7 +12,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "9bd1686f-9bbc-4c05-a5f5-e0c4ce653fb2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -37,7 +37,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 119,
|
||||
"execution_count": 117,
|
||||
"id": "fcc48831-7999-4d79-b722-736715b1ced6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -46,10 +46,10 @@
|
|||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"((479991, 3), (151690, 8), (151690, 4), (13327, 5))"
|
||||
"((479991, 3), (151690, 8), (151690, 4), (13327, 3))"
|
||||
]
|
||||
},
|
||||
"execution_count": 119,
|
||||
"execution_count": 117,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -79,6 +79,14 @@
|
|||
" how = \"outer\",\n",
|
||||
" suffixes = [\"_2021\", \"_2022\"])\n",
|
||||
"\n",
|
||||
"# Some usernames corresponding to one user_id have changed overtime. For easier handling only the latest username and handle is kept\n",
|
||||
"tweets_user = tweets_user.assign(handle = tweets_user.apply(lambda row: row['handle_2021'] if pd.isna(row['handle_2022']) else row['handle_2022'], axis=1),\n",
|
||||
" user_name = tweets_user.apply(lambda row: row['user_name_2021'] if pd.isna(row['user_name_2022']) else row['user_name_2022'], axis=1)\n",
|
||||
" ).drop(['handle_2021', 'handle_2022', 'user_name_2021', 'user_name_2022'], axis =1)\n",
|
||||
"\n",
|
||||
"police_stations = pd.read_csv(\"data/polizei_accounts_geo.csv\", sep = \"\\t\" # addiditional on police stations\n",
|
||||
" ).rename(columns = {\"Polizei Account\": \"handle\"})\n",
|
||||
"\n",
|
||||
"tweets_meta.shape, tweets_statistics.shape, tweets_text.shape, tweets_user.shape"
|
||||
]
|
||||
},
|
||||
|
@ -92,56 +100,39 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 150,
|
||||
"id": "cf409591-74a0-48dc-8f9e-66f7229f58cd",
|
||||
"execution_count": 118,
|
||||
"id": "f30c2799-02c6-4e6a-ae36-9e039545b6b3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Merge like statistics, tweet text and user information in one data frame\n",
|
||||
"tweets_combined = pd.merge(tweets_statistics, \n",
|
||||
" tweets_text,\n",
|
||||
" on = 'tweet_id').merge(tweets_user, on = 'user_id'\n",
|
||||
" ).drop(['id'], axis = 1) # drop unascessary id column (redundant to index)\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 119,
|
||||
"id": "bd407aba-eec1-41ed-bff9-4c5fcdf6cb9d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"tweet_id int64\n",
|
||||
"like_count int64\n",
|
||||
"retweet_count int64\n",
|
||||
"reply_count int64\n",
|
||||
"quote_count int64\n",
|
||||
"measured_at object\n",
|
||||
"is_deleted float64\n",
|
||||
"tweet_text object\n",
|
||||
"created_at object\n",
|
||||
"user_id int64\n",
|
||||
"user_name_2021 object\n",
|
||||
"handle_2021 object\n",
|
||||
"handle_2022 object\n",
|
||||
"user_name_2022 object\n",
|
||||
"dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 150,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tweets_combined = pd.merge(tweets_statistics, \n",
|
||||
" tweets_text,\n",
|
||||
" on = 'tweet_id').merge(tweets_user, on = 'user_id'\n",
|
||||
" ).drop(['id'], axis = 1) # drop unascessary id column (redundant to index)\n",
|
||||
" \n",
|
||||
"# Convert Counts to integer values\n",
|
||||
"tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(-99).astype(int)\n",
|
||||
"tweets_combined.dtypes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"id": "e312a975-3921-44ee-a7c5-37736678bc3f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/IPython/lib/pretty.py:778: FutureWarning: In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.\n",
|
||||
" output = repr(obj)\n",
|
||||
"/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/IPython/core/formatters.py:342: FutureWarning: In a future version, object-dtype columns with all-bool values will not be included in reductions with bool_only=True. Explicitly cast to bool dtype instead.\n",
|
||||
" return method()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
|
@ -163,124 +154,273 @@
|
|||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>tweet_id</th>\n",
|
||||
" <th>like_count</th>\n",
|
||||
" <th>retweet_count</th>\n",
|
||||
" <th>reply_count</th>\n",
|
||||
" <th>quote_count</th>\n",
|
||||
" <th>measured_at</th>\n",
|
||||
" <th>is_deleted</th>\n",
|
||||
" <th>tweet_text</th>\n",
|
||||
" <th>created_at</th>\n",
|
||||
" <th>user_id</th>\n",
|
||||
" <th>handle</th>\n",
|
||||
" <th>username</th>\n",
|
||||
" <th>user_name</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1000004686156652545</td>\n",
|
||||
" <td>6jannik9</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <td>1321021123463663616</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>@mahanna196 Da die Stadt keine Ausnahme für Ra...</td>\n",
|
||||
" <td>2020-10-27 09:29:13</td>\n",
|
||||
" <td>778895426007203840</td>\n",
|
||||
" <td>polizei_ol</td>\n",
|
||||
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>1000043230870867969</td>\n",
|
||||
" <td>lsollik</td>\n",
|
||||
" <td>Physiolucy</td>\n",
|
||||
" <td>1321037834246066181</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>@mahanna196 Ja. *sr</td>\n",
|
||||
" <td>2020-10-27 10:35:38</td>\n",
|
||||
" <td>778895426007203840</td>\n",
|
||||
" <td>polizei_ol</td>\n",
|
||||
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>1000405847460151296</td>\n",
|
||||
" <td>achim1949hans</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <td>1321068234955776000</td>\n",
|
||||
" <td>19</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>#Aktuell Auf dem ehem. Bundeswehrkrankenhausge...</td>\n",
|
||||
" <td>2020-10-27 12:36:26</td>\n",
|
||||
" <td>778895426007203840</td>\n",
|
||||
" <td>polizei_ol</td>\n",
|
||||
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>1000460805719121921</td>\n",
|
||||
" <td>wahrew</td>\n",
|
||||
" <td>WahreWorte</td>\n",
|
||||
" <td>1321073940199100416</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>@Emma36166433 Bitte lesen Sie unseren Tweet 2/...</td>\n",
|
||||
" <td>2020-10-27 12:59:06</td>\n",
|
||||
" <td>778895426007203840</td>\n",
|
||||
" <td>polizei_ol</td>\n",
|
||||
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>1000744009638252544</td>\n",
|
||||
" <td>derd1ck3</td>\n",
|
||||
" <td>Ⓓ①ⓒⓚ①③ (🏡)</td>\n",
|
||||
" <td>1321088646506754049</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>In der vergangenen Woche wurde die Wohnung des...</td>\n",
|
||||
" <td>2020-10-27 13:57:32</td>\n",
|
||||
" <td>778895426007203840</td>\n",
|
||||
" <td>polizei_ol</td>\n",
|
||||
" <td>Polizei Oldenburg-Stadt/Ammerland</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11554</th>\n",
|
||||
" <td>99931264</td>\n",
|
||||
" <td>havok1975</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <th>151685</th>\n",
|
||||
" <td>1625828803804004354</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2023-02-19 13:40:36</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>#Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 ...</td>\n",
|
||||
" <td>2023-02-15 12:06:07</td>\n",
|
||||
" <td>1168873095614160896</td>\n",
|
||||
" <td>polizeiberlin_p</td>\n",
|
||||
" <td>Polizei Berlin Prävention</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11555</th>\n",
|
||||
" <td>999542638226403328</td>\n",
|
||||
" <td>madame_de_saxe</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <th>151686</th>\n",
|
||||
" <td>1628004105623900167</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2023-02-25 13:14:49</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Unser Präventionsteam vom #A44 berät heute und...</td>\n",
|
||||
" <td>2023-02-21 12:10:00</td>\n",
|
||||
" <td>1168873095614160896</td>\n",
|
||||
" <td>polizeiberlin_p</td>\n",
|
||||
" <td>Polizei Berlin Prävention</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11556</th>\n",
|
||||
" <td>999901133282754560</td>\n",
|
||||
" <td>tungstendie74</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <th>151687</th>\n",
|
||||
" <td>1628004810183016448</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2023-02-25 13:14:49</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Auch unser #A52 war heute aktiv und hat zum Th...</td>\n",
|
||||
" <td>2023-02-21 12:12:48</td>\n",
|
||||
" <td>1168873095614160896</td>\n",
|
||||
" <td>polizeiberlin_p</td>\n",
|
||||
" <td>Polizei Berlin Prävention</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11557</th>\n",
|
||||
" <td>999904275080794112</td>\n",
|
||||
" <td>_danielheim</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <th>151688</th>\n",
|
||||
" <td>1628352896352878593</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2023-02-26 13:15:05</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Gestern führte unser #A13 in einer Wohnsiedlun...</td>\n",
|
||||
" <td>2023-02-22 11:15:58</td>\n",
|
||||
" <td>1168873095614160896</td>\n",
|
||||
" <td>polizeiberlin_p</td>\n",
|
||||
" <td>Polizei Berlin Prävention</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>11558</th>\n",
|
||||
" <td>999955376454930432</td>\n",
|
||||
" <td>amyman6010</td>\n",
|
||||
" <td>Systemstratege:</td>\n",
|
||||
" <th>151689</th>\n",
|
||||
" <td>1628709531998998529</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2023-02-27 12:17:33</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Auf dem Gelände der @BUFAStudios (Oberlandstr....</td>\n",
|
||||
" <td>2023-02-23 10:53:07</td>\n",
|
||||
" <td>1168873095614160896</td>\n",
|
||||
" <td>polizeiberlin_p</td>\n",
|
||||
" <td>Polizei Berlin Prävention</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>11559 rows × 3 columns</p>\n",
|
||||
"<p>151690 rows × 12 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" user_id handle username\n",
|
||||
"0 1000004686156652545 6jannik9 Systemstratege: \n",
|
||||
"1 1000043230870867969 lsollik Physiolucy\n",
|
||||
"2 1000405847460151296 achim1949hans Systemstratege: \n",
|
||||
"3 1000460805719121921 wahrew WahreWorte\n",
|
||||
"4 1000744009638252544 derd1ck3 Ⓓ①ⓒⓚ①③ (🏡)\n",
|
||||
"... ... ... ...\n",
|
||||
"11554 99931264 havok1975 Systemstratege: \n",
|
||||
"11555 999542638226403328 madame_de_saxe Systemstratege: \n",
|
||||
"11556 999901133282754560 tungstendie74 Systemstratege: \n",
|
||||
"11557 999904275080794112 _danielheim Systemstratege: \n",
|
||||
"11558 999955376454930432 amyman6010 Systemstratege: \n",
|
||||
" tweet_id like_count retweet_count reply_count \\\n",
|
||||
"0 1321021123463663616 2 1 2 \n",
|
||||
"1 1321037834246066181 2 0 0 \n",
|
||||
"2 1321068234955776000 19 3 3 \n",
|
||||
"3 1321073940199100416 0 0 0 \n",
|
||||
"4 1321088646506754049 2 0 0 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"151685 1625828803804004354 5 1 1 \n",
|
||||
"151686 1628004105623900167 2 0 0 \n",
|
||||
"151687 1628004810183016448 6 0 0 \n",
|
||||
"151688 1628352896352878593 2 0 0 \n",
|
||||
"151689 1628709531998998529 10 1 0 \n",
|
||||
"\n",
|
||||
"[11559 rows x 3 columns]"
|
||||
" quote_count measured_at is_deleted \\\n",
|
||||
"0 0 NaT NaN \n",
|
||||
"1 0 NaT NaN \n",
|
||||
"2 0 NaT NaN \n",
|
||||
"3 0 NaT NaN \n",
|
||||
"4 0 NaT NaN \n",
|
||||
"... ... ... ... \n",
|
||||
"151685 0 2023-02-19 13:40:36 False \n",
|
||||
"151686 0 2023-02-25 13:14:49 False \n",
|
||||
"151687 0 2023-02-25 13:14:49 False \n",
|
||||
"151688 0 2023-02-26 13:15:05 False \n",
|
||||
"151689 0 2023-02-27 12:17:33 False \n",
|
||||
"\n",
|
||||
" tweet_text created_at \\\n",
|
||||
"0 @mahanna196 Da die Stadt keine Ausnahme für Ra... 2020-10-27 09:29:13 \n",
|
||||
"1 @mahanna196 Ja. *sr 2020-10-27 10:35:38 \n",
|
||||
"2 #Aktuell Auf dem ehem. Bundeswehrkrankenhausge... 2020-10-27 12:36:26 \n",
|
||||
"3 @Emma36166433 Bitte lesen Sie unseren Tweet 2/... 2020-10-27 12:59:06 \n",
|
||||
"4 In der vergangenen Woche wurde die Wohnung des... 2020-10-27 13:57:32 \n",
|
||||
"... ... ... \n",
|
||||
"151685 #Sicherheit durch #Sichtbarkeit\\nUnsere #Dir3 ... 2023-02-15 12:06:07 \n",
|
||||
"151686 Unser Präventionsteam vom #A44 berät heute und... 2023-02-21 12:10:00 \n",
|
||||
"151687 Auch unser #A52 war heute aktiv und hat zum Th... 2023-02-21 12:12:48 \n",
|
||||
"151688 Gestern führte unser #A13 in einer Wohnsiedlun... 2023-02-22 11:15:58 \n",
|
||||
"151689 Auf dem Gelände der @BUFAStudios (Oberlandstr.... 2023-02-23 10:53:07 \n",
|
||||
"\n",
|
||||
" user_id handle \\\n",
|
||||
"0 778895426007203840 polizei_ol \n",
|
||||
"1 778895426007203840 polizei_ol \n",
|
||||
"2 778895426007203840 polizei_ol \n",
|
||||
"3 778895426007203840 polizei_ol \n",
|
||||
"4 778895426007203840 polizei_ol \n",
|
||||
"... ... ... \n",
|
||||
"151685 1168873095614160896 polizeiberlin_p \n",
|
||||
"151686 1168873095614160896 polizeiberlin_p \n",
|
||||
"151687 1168873095614160896 polizeiberlin_p \n",
|
||||
"151688 1168873095614160896 polizeiberlin_p \n",
|
||||
"151689 1168873095614160896 polizeiberlin_p \n",
|
||||
"\n",
|
||||
" user_name \n",
|
||||
"0 Polizei Oldenburg-Stadt/Ammerland \n",
|
||||
"1 Polizei Oldenburg-Stadt/Ammerland \n",
|
||||
"2 Polizei Oldenburg-Stadt/Ammerland \n",
|
||||
"3 Polizei Oldenburg-Stadt/Ammerland \n",
|
||||
"4 Polizei Oldenburg-Stadt/Ammerland \n",
|
||||
"... ... \n",
|
||||
"151685 Polizei Berlin Prävention \n",
|
||||
"151686 Polizei Berlin Prävention \n",
|
||||
"151687 Polizei Berlin Prävention \n",
|
||||
"151688 Polizei Berlin Prävention \n",
|
||||
"151689 Polizei Berlin Prävention \n",
|
||||
"\n",
|
||||
"[151690 rows x 12 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 44,
|
||||
"execution_count": 119,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tweets_meta = pd.read_csv(\"data/tweets.csv\")\n",
|
||||
"tweets_time = pd.read_csv(\"data/tweets-1679742620302.csv\")\n",
|
||||
"tweets_text = pd.read_csv(\"data/tweets-1679742698645.csv\")\n",
|
||||
"tweets_user = pd.read_csv(\"data/tweets-1679742702794.csv\"\n",
|
||||
" ).rename(columns = {\"username\":\"handle\", # rename columns\n",
|
||||
" \"handle\": \"username\"})\n",
|
||||
"tweets_user = tweets_user.assign(handle = tweets_user['handle'].str.lower()) # convert handles to lower case\n",
|
||||
"tweets_combined = pd.merge(tweets_time, # merge the two tweet related data frames\n",
|
||||
" tweets_text, \n",
|
||||
" how = 'inner', \n",
|
||||
" on = 'tweet_id'\n",
|
||||
" ).drop(['id'], # drop unascessary id column (redundant to index)\n",
|
||||
" axis = 1)\n",
|
||||
"# Convert Counts to integer values\n",
|
||||
"tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(-99).astype(int)\n",
|
||||
"tweets_combined = tweets_combined.assign(measured_at = pd.to_datetime(tweets_combined['measured_at']), # change date to date format\n",
|
||||
" created_at = pd.to_datetime(tweets_combined['created_at']))\n",
|
||||
"police_stations = pd.read_csv(\"data/polizei_accounts_geo.csv\", sep = \"\\t\" # addiditional on police stations\n",
|
||||
" ).rename(columns = {\"Polizei Account\": \"handle\"})\n",
|
||||
"tweets_user"
|
||||
" created_at = pd.to_datetime(tweets_combined['created_at']),\n",
|
||||
" handle = tweets_combined['handle'].str.lower(),\n",
|
||||
" is_deleted = tweets_combined['is_deleted'].map(lambda x: False if x == 0.0 else ( True if x == 1.0 else np.nan)))\n",
|
||||
"tweets_combined"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -337,7 +477,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 112,
|
||||
"id": "0e5eb455-6b12-4572-8f5e-f328a94bd797",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -346,13 +486,13 @@
|
|||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"hashtag 157145\n",
|
||||
"url 88322\n",
|
||||
"mention 36815\n",
|
||||
"hashtag 267255\n",
|
||||
"url 141594\n",
|
||||
"mention 71142\n",
|
||||
"Name: entity_type, dtype: int64"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 112,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -369,12 +509,12 @@
|
|||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"Insgesamt haben wir 84794 einzigartige Tweets:"
|
||||
"Insgesamt haben wir 151690 einzigartige Tweets:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 113,
|
||||
"id": "5a438e7f-8735-40bb-b450-2ce168f0f67a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -383,10 +523,10 @@
|
|||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"84794"
|
||||
"151690"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 113,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -397,7 +537,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 114,
|
||||
"id": "4f1e8c6c-3610-436e-899e-4d0307259230",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -407,12 +547,12 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Die Tweets wurden vom 2022-02-24 bis zum: 2023-03-16 gesammelt. Also genau insgesamt: 384 Tage.\n"
|
||||
"Die Tweets wurden vom 2020-10-27 bis zum: 2023-03-16 gesammelt. Also genau insgesamt: 870 Tage. (Mit kleinen Unterbrechungen)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"Die Tweets wurden vom \", tweets_combined['created_at'].min().date(), \"bis zum:\", tweets_combined['created_at'].max().date(), \"gesammelt.\", \"Also genau insgesamt:\", (tweets_combined['created_at'].max() - tweets_combined['created_at'].min()).days, \"Tage.\")\n",
|
||||
"print(\"Die Tweets wurden vom \", tweets_combined['created_at'].min().date(), \"bis zum:\", tweets_combined['created_at'].max().date(), \"gesammelt.\", \"Also genau insgesamt:\", (tweets_combined['created_at'].max() - tweets_combined['created_at'].min()).days, \"Tage. (Mit kleinen Unterbrechungen)\")\n",
|
||||
"# tweets_combined[tweets_combined['created_at'] == tweets_combined['created_at'].max()] # Tweets vom letzten Tag"
|
||||
]
|
||||
},
|
||||
|
@ -428,9 +568,11 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 122,
|
||||
"id": "9373552e-6baf-46df-ae16-c63603e20a83",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
|
@ -467,7 +609,7 @@
|
|||
" <tr>\n",
|
||||
" <th>11</th>\n",
|
||||
" <td>polizei_ffm</td>\n",
|
||||
" <td>2993</td>\n",
|
||||
" <td>5512</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
|
@ -476,20 +618,9 @@
|
|||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>polizei_nrw_do</td>\n",
|
||||
" <td>2860</td>\n",
|
||||
" <td>Polizei NRW DO</td>\n",
|
||||
" <td>Polizei</td>\n",
|
||||
" <td>Nordrhein-Westfalen</td>\n",
|
||||
" <td>Dortmund</td>\n",
|
||||
" <td>51.5142273</td>\n",
|
||||
" <td>7.4652789</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>polizeisachsen</td>\n",
|
||||
" <td>2700</td>\n",
|
||||
" <td>5340</td>\n",
|
||||
" <td>Polizei Sachsen</td>\n",
|
||||
" <td>Polizei</td>\n",
|
||||
" <td>Sachsen</td>\n",
|
||||
|
@ -498,9 +629,20 @@
|
|||
" <td>13.7381437</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>91</th>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>polizei_nrw_do</td>\n",
|
||||
" <td>4895</td>\n",
|
||||
" <td>Polizei NRW DO</td>\n",
|
||||
" <td>Polizei</td>\n",
|
||||
" <td>Nordrhein-Westfalen</td>\n",
|
||||
" <td>Dortmund</td>\n",
|
||||
" <td>51.5142273</td>\n",
|
||||
" <td>7.4652789</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>92</th>\n",
|
||||
" <td>polizeibb</td>\n",
|
||||
" <td>2310</td>\n",
|
||||
" <td>4323</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
|
@ -511,7 +653,7 @@
|
|||
" <tr>\n",
|
||||
" <th>61</th>\n",
|
||||
" <td>polizeihamburg</td>\n",
|
||||
" <td>2093</td>\n",
|
||||
" <td>4042</td>\n",
|
||||
" <td>Polizei Hamburg</td>\n",
|
||||
" <td>Polizei</td>\n",
|
||||
" <td>Hamburg</td>\n",
|
||||
|
@ -525,35 +667,32 @@
|
|||
],
|
||||
"text/plain": [
|
||||
" handle count Name Typ Bundesland \\\n",
|
||||
"11 polizei_ffm 2993 NaN NaN NaN \n",
|
||||
"3 polizei_nrw_do 2860 Polizei NRW DO Polizei Nordrhein-Westfalen \n",
|
||||
"0 polizeisachsen 2700 Polizei Sachsen Polizei Sachsen \n",
|
||||
"91 polizeibb 2310 NaN NaN NaN \n",
|
||||
"61 polizeihamburg 2093 Polizei Hamburg Polizei Hamburg \n",
|
||||
"11 polizei_ffm 5512 NaN NaN NaN \n",
|
||||
"0 polizeisachsen 5340 Polizei Sachsen Polizei Sachsen \n",
|
||||
"3 polizei_nrw_do 4895 Polizei NRW DO Polizei Nordrhein-Westfalen \n",
|
||||
"92 polizeibb 4323 NaN NaN NaN \n",
|
||||
"61 polizeihamburg 4042 Polizei Hamburg Polizei Hamburg \n",
|
||||
"\n",
|
||||
" Stadt LAT LONG \n",
|
||||
"11 NaN NaN NaN \n",
|
||||
"3 Dortmund 51.5142273 7.4652789 \n",
|
||||
"0 Dresden 51.0493286 13.7381437 \n",
|
||||
"91 NaN NaN NaN \n",
|
||||
"3 Dortmund 51.5142273 7.4652789 \n",
|
||||
"92 NaN NaN NaN \n",
|
||||
"61 Hamburg 53.550341 10.000654 "
|
||||
]
|
||||
},
|
||||
"execution_count": 43,
|
||||
"execution_count": 122,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tweets_agg = tweets_combined.merge(tweets_user,\n",
|
||||
" on = \"user_id\"\n",
|
||||
" ).groupby(by = [\"user_id\", \"handle\", \"username\"]\n",
|
||||
" )[\"user_id\"].aggregate(['count']\n",
|
||||
" ).merge(police_stations, \n",
|
||||
" on = \"handle\",\n",
|
||||
" how = \"left\"\n",
|
||||
" ).sort_values(['count'], \n",
|
||||
" ascending=False)\n",
|
||||
"tweets_agg = tweets_combined.groupby(by = [\"user_id\", \"user_name\", \"handle\"]\n",
|
||||
" )[\"user_id\"].aggregate(['count']\n",
|
||||
" ).merge(police_stations,\n",
|
||||
" on = \"handle\",\n",
|
||||
" how = \"left\"\n",
|
||||
" ).sort_values(['count'], ascending=False)\n",
|
||||
"tweets_agg.shape\n",
|
||||
"activy_police_vis = tweets_agg[0:50]\n",
|
||||
"activy_police_vis.head()"
|
||||
|
@ -571,23 +710,31 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"execution_count": 123,
|
||||
"id": "b1c39196-d1cc-4f82-8e01-7529e7b3046f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/nix/store/4105l1v2llsjz4j7qaqsz0fljc9z0z2r-python3-3.10.9-env/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n",
|
||||
" for col_name, dtype in df.dtypes.iteritems():\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"\n",
|
||||
"<div id=\"altair-viz-a660bd38b72240eaae654b5e471932a6\"></div>\n",
|
||||
"<div id=\"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\"></div>\n",
|
||||
"<script type=\"text/javascript\">\n",
|
||||
" var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
|
||||
" (function(spec, embedOpt){\n",
|
||||
" let outputDiv = document.currentScript.previousElementSibling;\n",
|
||||
" if (outputDiv.id !== \"altair-viz-a660bd38b72240eaae654b5e471932a6\") {\n",
|
||||
" outputDiv = document.getElementById(\"altair-viz-a660bd38b72240eaae654b5e471932a6\");\n",
|
||||
" if (outputDiv.id !== \"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\") {\n",
|
||||
" outputDiv = document.getElementById(\"altair-viz-c1c17c98428f4353a3eca9bd87ef6517\");\n",
|
||||
" }\n",
|
||||
" const paths = {\n",
|
||||
" \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
|
||||
|
@ -633,14 +780,14 @@
|
|||
" .catch(showError)\n",
|
||||
" .then(() => displayChart(vegaEmbed));\n",
|
||||
" }\n",
|
||||
" })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-da2bacd5b3a57271f77be4dc435a345f\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"handle\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-da2bacd5b3a57271f77be4dc435a345f\": [{\"handle\": \"polizei_ffm\", \"count\": 2993, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_nrw_do\", \"count\": 2860, \"Name\": \"Polizei NRW DO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Dortmund\", \"LAT\": \"51.5142273\", \"LONG\": \"7.4652789\"}, {\"handle\": \"polizeisachsen\", \"count\": 2700, \"Name\": \"Polizei Sachsen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen\", \"Stadt\": \"Dresden\", \"LAT\": \"51.0493286\", \"LONG\": \"13.7381437\"}, {\"handle\": \"polizeibb\", \"count\": 2310, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeihamburg\", \"count\": 2093, \"Name\": \"Polizei Hamburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Hamburg\", \"Stadt\": \"Hamburg\", \"LAT\": \"53.550341\", \"LONG\": \"10.000654\"}, {\"handle\": \"polizeimuenchen\", \"count\": 2021, \"Name\": \"Polizei M\\u00fcnchen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"M\\u00fcnchen\", \"LAT\": \"48.135125\", \"LONG\": \"11.581981\"}, {\"handle\": \"polizeimfr\", \"count\": 1892, \"Name\": \"Polizei Mittelfranken\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"N\\u00fcrnberg\", \"LAT\": \"49.453872\", \"LONG\": \"11.077298\"}, {\"handle\": \"polizeimannheim\", \"count\": 1835, \"Name\": \"Polizei Mannheim\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Mannheim\", \"LAT\": \"49.4892913\", \"LONG\": \"8.4673098\"}, {\"handle\": \"polizei_nrw_bi\", \"count\": 1794, \"Name\": \"Polizei NRW BI\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bielefeld\", \"LAT\": \"52.0191005\", \"LONG\": \"8.531007\"}, {\"handle\": \"polizei_nrw_k\", \"count\": 1540, \"Name\": \"Polizei NRW K\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"K\\u00f6ln\", \"LAT\": \"50.938361\", \"LONG\": \"6.959974\"}, {\"handle\": \"bremenpolizei\", \"count\": 1417, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_kl\", \"count\": 1380, \"Name\": \"Polizei Kaiserslautern\", \"Typ\": \"Polizei\", \"Bundesland\": \"Rheinland-Pfalz\", \"Stadt\": \"Kaiserslautern\", \"LAT\": \"49.4432174\", \"LONG\": \"7.7689951\"}, {\"handle\": \"polizei_md\", \"count\": 1365, \"Name\": \"Polizei Magdeburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen-Anhalt\", \"Stadt\": \"Magdeburg\", \"LAT\": \"52.1315889\", \"LONG\": \"11.6399609\"}, {\"handle\": \"polizei_ka\", \"count\": 1356, \"Name\": \"Polizei Karlsruhe\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Karlsruhe\", \"LAT\": \"49.0068705\", \"LONG\": \"8.4034195\"}, {\"handle\": \"polizeiberlin\", \"count\": 1351, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}]}}, {\"mode\": \"vega-lite\"});\n",
|
||||
" })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-59538db49feb940cb722f8834432bfab\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"handle\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-59538db49feb940cb722f8834432bfab\": [{\"handle\": \"polizei_ffm\", \"count\": 5512, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeisachsen\", \"count\": 5340, \"Name\": \"Polizei Sachsen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen\", \"Stadt\": \"Dresden\", \"LAT\": \"51.0493286\", \"LONG\": \"13.7381437\"}, {\"handle\": \"polizei_nrw_do\", \"count\": 4895, \"Name\": \"Polizei NRW DO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Dortmund\", \"LAT\": \"51.5142273\", \"LONG\": \"7.4652789\"}, {\"handle\": \"polizeibb\", \"count\": 4323, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeihamburg\", \"count\": 4042, \"Name\": \"Polizei Hamburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Hamburg\", \"Stadt\": \"Hamburg\", \"LAT\": \"53.550341\", \"LONG\": \"10.000654\"}, {\"handle\": \"polizeimuenchen\", \"count\": 3951, \"Name\": \"Polizei M\\u00fcnchen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"M\\u00fcnchen\", \"LAT\": \"48.135125\", \"LONG\": \"11.581981\"}, {\"handle\": \"polizeimfr\", \"count\": 3317, \"Name\": \"Polizei Mittelfranken\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"N\\u00fcrnberg\", \"LAT\": \"49.453872\", \"LONG\": \"11.077298\"}, {\"handle\": \"polizeimannheim\", \"count\": 3300, \"Name\": \"Polizei Mannheim\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Mannheim\", \"LAT\": \"49.4892913\", \"LONG\": \"8.4673098\"}, {\"handle\": \"bremenpolizei\", \"count\": 2664, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_ka\", \"count\": 2568, \"Name\": \"Polizei Karlsruhe\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Karlsruhe\", \"LAT\": \"49.0068705\", \"LONG\": \"8.4034195\"}, {\"handle\": \"polizei_nrw_k\", \"count\": 2544, \"Name\": \"Polizei NRW K\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"K\\u00f6ln\", \"LAT\": \"50.938361\", \"LONG\": \"6.959974\"}, {\"handle\": \"polizei_nrw_bo\", \"count\": 2367, \"Name\": \"Polizei NRW BO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bochum\", \"LAT\": \"51.4818111\", \"LONG\": \"7.2196635\"}, {\"handle\": \"polizei_md\", \"count\": 2319, \"Name\": \"Polizei Magdeburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen-Anhalt\", \"Stadt\": \"Magdeburg\", \"LAT\": \"52.1315889\", \"LONG\": \"11.6399609\"}, {\"handle\": \"polizei_h\", \"count\": 2302, \"Name\": \"Polizei Hannover\", \"Typ\": \"Polizei\", \"Bundesland\": \"Niedersachsen\", \"Stadt\": \"Hannover\", \"LAT\": \"52.3744779\", \"LONG\": \"9.7385532\"}, {\"handle\": \"polizei_nrw_bi\", \"count\": 2299, \"Name\": \"Polizei NRW BI\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bielefeld\", \"LAT\": \"52.0191005\", \"LONG\": \"8.531007\"}]}}, {\"mode\": \"vega-lite\"});\n",
|
||||
"</script>"
|
||||
],
|
||||
"text/plain": [
|
||||
"alt.Chart(...)"
|
||||
]
|
||||
},
|
||||
"execution_count": 47,
|
||||
"execution_count": 123,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -665,7 +812,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 90,
|
||||
"execution_count": 125,
|
||||
"id": "d0549250-b11f-4762-8500-1134c53303b4",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -674,32 +821,29 @@
|
|||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"0 Die Gewalt, die unsere Kolleginnen & Kollegen in der Silvesternacht erleben mussten, ist une...\n",
|
||||
"1 WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...\n",
|
||||
"2 Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...\n",
|
||||
"3 Auf unserem #A45 in #lichterfelde) befindet sich gerade diese Fundhündin. Sie wurde am Hindenbur...\n",
|
||||
"4 @nexta_tv Wir haben das Video gesichert und leiten den Sachverhalt an die zuständigen Kolleginne...\n",
|
||||
" ... \n",
|
||||
"84789 #Polizeimeldungen #Tagesticker\\n \\nAnhalt-Bitterfeld\\nhttps://t.co/tNLEzztL1o\\n \\nDessau-Roßlau\\...\n",
|
||||
"84790 Am Mittwoch erhielten wir mehrere Anrufe über einen auffälligen Pkw-Fahrer (Reifen quietschen un...\n",
|
||||
"84791 @Jonas5Luisa Kleiner Pro-Tipp von uns: Einfach mal auf den link klicken! ;)*cl\n",
|
||||
"84792 Vermisstensuche nach 27-Jährigem aus Bendorf-Mühlhofen: Wer hat Tobias Wißmann gesehen? Ein Foto...\n",
|
||||
"84793 #PolizeiNRW #Köln #Leverkusen : XXX - Infos unter https://t.co/SeWShP2tZE https://t.co/Kopy7w8W3B\n",
|
||||
"Name: tweet_text, Length: 84794, dtype: object"
|
||||
"0 Die Gewalt, die unsere Kolleginnen & Kollegen in der Silvesternacht erleben mussten, ist une...\n",
|
||||
"1 An diejenigen, die vergangene Nacht in eine Schule in #Gesundbrunnen eingebrochen sind und 242 T...\n",
|
||||
"2 WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...\n",
|
||||
"3 Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...\n",
|
||||
"4 Weil wir dich schieben! @BVG_Kampagne 😉 https://t.co/N8kdlCxhz2\n",
|
||||
" ... \n",
|
||||
"151685 Sinken die Temperaturen ❄, steigt zeitgleich das Risiko für Verkehrsteilnehmer. Höchste Zeit zu ...\n",
|
||||
"151686 📺Am Sonntag, um 19:50 Uhr, geht es bei #KripoLive im \\n@mdrde\\n auch um die Fahndung nach einem ...\n",
|
||||
"151687 Musik verbindet!\\nUnser #Adventskalender der #Bundespolizei startet morgen ➡ https://t.co/V6CaTV...\n",
|
||||
"151688 @gretchen_hann Hallo, diese Frage kann die Bundespolizei Spezialkräfte besser beantworten. Richt...\n",
|
||||
"151689 #Bönen #Holzwickede - Verstöße gegen Coronaschutzverordnung: Polizei löst Gaststättenabend und F...\n",
|
||||
"Name: tweet_text, Length: 151690, dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 90,
|
||||
"execution_count": 125,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tweets_attention = tweets_combined.merge(tweets_user,\n",
|
||||
" on = \"user_id\",\n",
|
||||
" how = \"left\"\n",
|
||||
" ).merge(police_stations,\n",
|
||||
" on = \"handle\",\n",
|
||||
" how = \"left\")\n",
|
||||
"tweets_attention = tweets_combined.merge(police_stations,\n",
|
||||
" on = \"handle\",\n",
|
||||
" how = \"left\")\n",
|
||||
"pd.options.display.max_colwidth = 100\n",
|
||||
"tweets_attention.sort_values('like_count', ascending = False).reset_index()['tweet_text']\n",
|
||||
"\n"
|
||||
|
@ -841,7 +985,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 148,
|
||||
"execution_count": 121,
|
||||
"id": "ed86b45e-9dd8-436d-9c96-15500ed93985",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
|
@ -868,142 +1012,104 @@
|
|||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>like_count</th>\n",
|
||||
" <th>retweet_count</th>\n",
|
||||
" <th>reply_count</th>\n",
|
||||
" <th>quote_count</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th>count</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>user_id</th>\n",
|
||||
" <th>user_name</th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>223758384</th>\n",
|
||||
" <th>Polizei Sachsen</th>\n",
|
||||
" <td>5340</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>259607457</th>\n",
|
||||
" <th>Polizei NRW K</th>\n",
|
||||
" <td>2544</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>19</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>424895827</th>\n",
|
||||
" <th>Polizei Stuttgart</th>\n",
|
||||
" <td>1913</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>769128278</th>\n",
|
||||
" <th>Polizei NRW DO</th>\n",
|
||||
" <td>4895</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>775664780</th>\n",
|
||||
" <th>Polizei Rostock</th>\n",
|
||||
" <td>604</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>151685</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>1169206134189830145</th>\n",
|
||||
" <th>Polizei Stendal</th>\n",
|
||||
" <td>842</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>151686</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>1184022676488314880</th>\n",
|
||||
" <th>Polizei Pforzheim</th>\n",
|
||||
" <td>283</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>151687</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>1184024283342950401</th>\n",
|
||||
" <th>Polizei Ravensburg</th>\n",
|
||||
" <td>460</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>151688</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>1232548941889228808</th>\n",
|
||||
" <th>Systemstratege:</th>\n",
|
||||
" <td>168</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>151689</th>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <th>1295978598034284546</th>\n",
|
||||
" <th>Polizei ZPD NI</th>\n",
|
||||
" <td>133</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>151690 rows × 4 columns</p>\n",
|
||||
"<p>163 rows × 1 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" like_count retweet_count reply_count quote_count\n",
|
||||
"0 2 1 2 0\n",
|
||||
"1 2 0 0 0\n",
|
||||
"2 19 3 3 0\n",
|
||||
"3 0 0 0 0\n",
|
||||
"4 2 0 0 0\n",
|
||||
"... ... ... ... ...\n",
|
||||
"151685 5 1 1 0\n",
|
||||
"151686 2 0 0 0\n",
|
||||
"151687 6 0 0 0\n",
|
||||
"151688 2 0 0 0\n",
|
||||
"151689 10 1 0 0\n",
|
||||
" count\n",
|
||||
"user_id user_name \n",
|
||||
"223758384 Polizei Sachsen 5340\n",
|
||||
"259607457 Polizei NRW K 2544\n",
|
||||
"424895827 Polizei Stuttgart 1913\n",
|
||||
"769128278 Polizei NRW DO 4895\n",
|
||||
"775664780 Polizei Rostock 604\n",
|
||||
"... ...\n",
|
||||
"1169206134189830145 Polizei Stendal 842\n",
|
||||
"1184022676488314880 Polizei Pforzheim 283\n",
|
||||
"1184024283342950401 Polizei Ravensburg 460\n",
|
||||
"1232548941889228808 Systemstratege: 168\n",
|
||||
"1295978598034284546 Polizei ZPD NI 133\n",
|
||||
"\n",
|
||||
"[151690 rows x 4 columns]"
|
||||
"[163 rows x 1 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 148,
|
||||
"execution_count": 121,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 142,
|
||||
"id": "dac4e5fc-22ca-466d-bc3c-586e68696d03",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"like_count\n",
|
||||
"False 147573\n",
|
||||
"True 4117\n",
|
||||
"dtype: int64"
|
||||
]
|
||||
},
|
||||
"execution_count": 142,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": []
|
||||
"source": [
|
||||
"tweets_combined.groupby(by = [\"user_id\", \"user_name\"]\n",
|
||||
" )[\"user_id\"].aggregate(['count']\n",
|
||||
" )"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
Loading…
Reference in a new issue