copbird_aufarbeitung/.ipynb_checkpoints/zusammenfassung-checkpoint.ipynb

1031 lines
38 KiB
Text
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"id": "83885e86-1ccb-46ec-bee9-a33f3b541569",
"metadata": {},
"source": [
"# Zusammenfassung der Analysen vom Hackathon für die Webside\n",
"\n",
"- womöglich zur Darstellung auf der Webside\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "9bd1686f-9bbc-4c05-a5f5-e0c4ce653fb2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import altair as alt"
]
},
{
"cell_type": "markdown",
"id": "81780c9a-7721-438b-9726-ff5a70910ce8",
"metadata": {},
"source": [
"## Daten aufbereitung\n",
"\n",
"Dump der Datenbank vom 25.03.2023. Die verschiedene Tabellen der Datenbank werden einzeln eingelesen. Zusätzlich werden alle direkt zu einem Tweet zugehörige Information in ein Datenobjekt gesammelt. Die Informationen zu den GIS-Daten zu den einzelnen Polizeistadtion (\"police_stations\") sind noch unvollständig und müssen gegebenfalls nocheinmal überprüft werden.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 119,
"id": "fcc48831-7999-4d79-b722-736715b1ced6",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"((479991, 3), (151690, 8), (151690, 4), (13327, 5))"
]
},
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tweets_meta = pd.concat([pd.read_csv(\"data/entity_old.tsv\", sep = \"\\t\"), # data from old scraper\n",
" pd.read_csv(\"data/tweets.csv\")]) # data from new scraper\n",
"\n",
"tweets_text = pd.concat([pd.read_csv(\"data/tweet_old.tsv\", sep = \"\\t\")[['id', \n",
" 'tweet_text', \n",
" 'created_at', \n",
" 'user_id']].rename(columns = {\"id\":\"tweet_id\"}),\n",
" pd.read_csv(\"data/tweets-1679742698645.csv\")])\n",
"\n",
"tweets_statistics = pd.concat([pd.read_csv(\"data/tweet_old.tsv\", sep = \"\\t\")[['id', \n",
" 'like_count', \n",
" 'retweet_count', \n",
" 'reply_count', \n",
" 'quote_count']].rename(columns = {\"id\":\"tweet_id\"}),\n",
" pd.read_csv(\"data/tweets-1679742620302.csv\")])\n",
"\n",
"tweets_user = pd.read_csv(\"data/user_old.tsv\", \n",
" sep = \"\\t\").rename(columns = {\"id\":\"user_id\",\"name\": \"user_name\"}\n",
" ).merge(pd.read_csv(\"data/tweets-1679742702794.csv\"\n",
" ).rename(columns = {\"username\":\"handle\", \"handle\": \"user_name\"}),\n",
" on = \"user_id\",\n",
" how = \"outer\",\n",
" suffixes = [\"_2021\", \"_2022\"])\n",
"\n",
"tweets_meta.shape, tweets_statistics.shape, tweets_text.shape, tweets_user.shape"
]
},
{
"cell_type": "markdown",
"id": "0f7b2b95-0a6c-42c6-a308-5f68d4ba94b9",
"metadata": {},
"source": [
"Jetzt können noch alle Tweet bezogenen informationen in einem Data Frame gespeichert werden:"
]
},
{
"cell_type": "code",
"execution_count": 150,
"id": "cf409591-74a0-48dc-8f9e-66f7229f58cd",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"tweet_id int64\n",
"like_count int64\n",
"retweet_count int64\n",
"reply_count int64\n",
"quote_count int64\n",
"measured_at object\n",
"is_deleted float64\n",
"tweet_text object\n",
"created_at object\n",
"user_id int64\n",
"user_name_2021 object\n",
"handle_2021 object\n",
"handle_2022 object\n",
"user_name_2022 object\n",
"dtype: object"
]
},
"execution_count": 150,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tweets_combined = pd.merge(tweets_statistics, \n",
" tweets_text,\n",
" on = 'tweet_id').merge(tweets_user, on = 'user_id'\n",
" ).drop(['id'], axis = 1) # drop unascessary id column (redundant to index)\n",
" \n",
"# Convert Counts to integer values\n",
"tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']] = tweets_combined[['like_count', 'retweet_count', 'reply_count', 'quote_count']].fillna(-99).astype(int)\n",
"tweets_combined.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "e312a975-3921-44ee-a7c5-37736678bc3f",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>handle</th>\n",
" <th>username</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1000004686156652545</td>\n",
" <td>6jannik9</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1000043230870867969</td>\n",
" <td>lsollik</td>\n",
" <td>Physiolucy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1000405847460151296</td>\n",
" <td>achim1949hans</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1000460805719121921</td>\n",
" <td>wahrew</td>\n",
" <td>WahreWorte</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1000744009638252544</td>\n",
" <td>derd1ck3</td>\n",
" <td>Ⓓ①ⓒⓚ①③ (🏡)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11554</th>\n",
" <td>99931264</td>\n",
" <td>havok1975</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11555</th>\n",
" <td>999542638226403328</td>\n",
" <td>madame_de_saxe</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11556</th>\n",
" <td>999901133282754560</td>\n",
" <td>tungstendie74</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11557</th>\n",
" <td>999904275080794112</td>\n",
" <td>_danielheim</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11558</th>\n",
" <td>999955376454930432</td>\n",
" <td>amyman6010</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>11559 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" user_id handle username\n",
"0 1000004686156652545 6jannik9 Systemstratege: \n",
"1 1000043230870867969 lsollik Physiolucy\n",
"2 1000405847460151296 achim1949hans Systemstratege: \n",
"3 1000460805719121921 wahrew WahreWorte\n",
"4 1000744009638252544 derd1ck3 Ⓓ①ⓒⓚ①③ (🏡)\n",
"... ... ... ...\n",
"11554 99931264 havok1975 Systemstratege: \n",
"11555 999542638226403328 madame_de_saxe Systemstratege: \n",
"11556 999901133282754560 tungstendie74 Systemstratege: \n",
"11557 999904275080794112 _danielheim Systemstratege: \n",
"11558 999955376454930432 amyman6010 Systemstratege: \n",
"\n",
"[11559 rows x 3 columns]"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tweets_meta = pd.read_csv(\"data/tweets.csv\")\n",
"tweets_time = pd.read_csv(\"data/tweets-1679742620302.csv\")\n",
"tweets_text = pd.read_csv(\"data/tweets-1679742698645.csv\")\n",
"tweets_user = pd.read_csv(\"data/tweets-1679742702794.csv\"\n",
" ).rename(columns = {\"username\":\"handle\", # rename columns\n",
" \"handle\": \"username\"})\n",
"tweets_user = tweets_user.assign(handle = tweets_user['handle'].str.lower()) # convert handles to lower case\n",
"tweets_combined = pd.merge(tweets_time, # merge the two tweet related data frames\n",
" tweets_text, \n",
" how = 'inner', \n",
" on = 'tweet_id'\n",
" ).drop(['id'], # drop unascessary id column (redundant to index)\n",
" axis = 1)\n",
"tweets_combined = tweets_combined.assign(measured_at = pd.to_datetime(tweets_combined['measured_at']), # change date to date format\n",
" created_at = pd.to_datetime(tweets_combined['created_at']))\n",
"police_stations = pd.read_csv(\"data/polizei_accounts_geo.csv\", sep = \"\\t\" # addiditional on police stations\n",
" ).rename(columns = {\"Polizei Account\": \"handle\"})\n",
"tweets_user"
]
},
{
"cell_type": "markdown",
"id": "91dfb8bb-15dc-4b2c-9c5f-3eab18d78ef8",
"metadata": {
"tags": []
},
"source": [
"### Adjazenzmatrix mentions\n",
" \n",
"Information, welche nicht direkt enthalten ist: welche Accounts werden erwähnt. Ist nur im Tweet mit @handle gekennzeichnet."
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "5d8bf730-3c8f-4143-b405-c95f1914f54b",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"0 Auch wir schließen uns dem Apell an! \\n\\n#Ukra...\n",
"1 @BWeltenbummler Sehr schwer zu sagen. Die Evak...\n",
"2 Halten Sie durch die Evakuierung ist fast ab...\n",
"3 Halten Sie durch die Evakuierung ist fast ab...\n",
"4 RT @drkberlin_iuk: 🚨 In enger Abstimmung mit d...\n",
"Name: tweet_text, dtype: object"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# TODO"
]
},
{
"cell_type": "markdown",
"id": "0c242090-0748-488c-b604-f521030f468f",
"metadata": {
"tags": []
},
"source": [
"## Metadaten \n",
"\n",
"Welche Daten bilden die Grundlage?"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "0e5eb455-6b12-4572-8f5e-f328a94bd797",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"hashtag 157145\n",
"url 88322\n",
"mention 36815\n",
"Name: entity_type, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tweets_meta[\"entity_type\"].value_counts()\n",
"# tweets_meta[tweets_meta['entity_type'] == \"mention\"]"
]
},
{
"cell_type": "markdown",
"id": "ef440301-cf89-4e80-8801-eb853d636190",
"metadata": {
"tags": []
},
"source": [
"Insgesamt haben wir 84794 einzigartige Tweets:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "5a438e7f-8735-40bb-b450-2ce168f0f67a",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"84794"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tweets_combined[\"tweet_id\"].value_counts().shape[0] # Anzahl an Tweets"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "4f1e8c6c-3610-436e-899e-4d0307259230",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Die Tweets wurden vom 2022-02-24 bis zum: 2023-03-16 gesammelt. Also genau insgesamt: 384 Tage.\n"
]
}
],
"source": [
"print(\"Die Tweets wurden vom \", tweets_combined['created_at'].min().date(), \"bis zum:\", tweets_combined['created_at'].max().date(), \"gesammelt.\", \"Also genau insgesamt:\", (tweets_combined['created_at'].max() - tweets_combined['created_at'].min()).days, \"Tage.\")\n",
"# tweets_combined[tweets_combined['created_at'] == tweets_combined['created_at'].max()] # Tweets vom letzten Tag"
]
},
{
"cell_type": "markdown",
"id": "d8b47a60-1535-4d03-913a-73e897bc18df",
"metadata": {
"tags": []
},
"source": [
"Welche Polizei Accounts haben am meisten getweetet?"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "9373552e-6baf-46df-ae16-c63603e20a83",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>handle</th>\n",
" <th>count</th>\n",
" <th>Name</th>\n",
" <th>Typ</th>\n",
" <th>Bundesland</th>\n",
" <th>Stadt</th>\n",
" <th>LAT</th>\n",
" <th>LONG</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>polizei_ffm</td>\n",
" <td>2993</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>polizei_nrw_do</td>\n",
" <td>2860</td>\n",
" <td>Polizei NRW DO</td>\n",
" <td>Polizei</td>\n",
" <td>Nordrhein-Westfalen</td>\n",
" <td>Dortmund</td>\n",
" <td>51.5142273</td>\n",
" <td>7.4652789</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>polizeisachsen</td>\n",
" <td>2700</td>\n",
" <td>Polizei Sachsen</td>\n",
" <td>Polizei</td>\n",
" <td>Sachsen</td>\n",
" <td>Dresden</td>\n",
" <td>51.0493286</td>\n",
" <td>13.7381437</td>\n",
" </tr>\n",
" <tr>\n",
" <th>91</th>\n",
" <td>polizeibb</td>\n",
" <td>2310</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>polizeihamburg</td>\n",
" <td>2093</td>\n",
" <td>Polizei Hamburg</td>\n",
" <td>Polizei</td>\n",
" <td>Hamburg</td>\n",
" <td>Hamburg</td>\n",
" <td>53.550341</td>\n",
" <td>10.000654</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" handle count Name Typ Bundesland \\\n",
"11 polizei_ffm 2993 NaN NaN NaN \n",
"3 polizei_nrw_do 2860 Polizei NRW DO Polizei Nordrhein-Westfalen \n",
"0 polizeisachsen 2700 Polizei Sachsen Polizei Sachsen \n",
"91 polizeibb 2310 NaN NaN NaN \n",
"61 polizeihamburg 2093 Polizei Hamburg Polizei Hamburg \n",
"\n",
" Stadt LAT LONG \n",
"11 NaN NaN NaN \n",
"3 Dortmund 51.5142273 7.4652789 \n",
"0 Dresden 51.0493286 13.7381437 \n",
"91 NaN NaN NaN \n",
"61 Hamburg 53.550341 10.000654 "
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tweets_agg = tweets_combined.merge(tweets_user,\n",
" on = \"user_id\"\n",
" ).groupby(by = [\"user_id\", \"handle\", \"username\"]\n",
" )[\"user_id\"].aggregate(['count']\n",
" ).merge(police_stations, \n",
" on = \"handle\",\n",
" how = \"left\"\n",
" ).sort_values(['count'], \n",
" ascending=False)\n",
"tweets_agg.shape\n",
"activy_police_vis = tweets_agg[0:50]\n",
"activy_police_vis.head()"
]
},
{
"cell_type": "markdown",
"id": "9cf5f544-706b-41af-b785-7023f04e3ecb",
"metadata": {
"tags": []
},
"source": [
"Visualisierung aktivste Polizeistadtionen:"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "b1c39196-d1cc-4f82-8e01-7529e7b3046f",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"<div id=\"altair-viz-a660bd38b72240eaae654b5e471932a6\"></div>\n",
"<script type=\"text/javascript\">\n",
" var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
" (function(spec, embedOpt){\n",
" let outputDiv = document.currentScript.previousElementSibling;\n",
" if (outputDiv.id !== \"altair-viz-a660bd38b72240eaae654b5e471932a6\") {\n",
" outputDiv = document.getElementById(\"altair-viz-a660bd38b72240eaae654b5e471932a6\");\n",
" }\n",
" const paths = {\n",
" \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
" \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
" \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
" \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
" };\n",
"\n",
" function maybeLoadScript(lib, version) {\n",
" var key = `${lib.replace(\"-\", \"\")}_version`;\n",
" return (VEGA_DEBUG[key] == version) ?\n",
" Promise.resolve(paths[lib]) :\n",
" new Promise(function(resolve, reject) {\n",
" var s = document.createElement('script');\n",
" document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
" s.async = true;\n",
" s.onload = () => {\n",
" VEGA_DEBUG[key] = version;\n",
" return resolve(paths[lib]);\n",
" };\n",
" s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
" s.src = paths[lib];\n",
" });\n",
" }\n",
"\n",
" function showError(err) {\n",
" outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
" throw err;\n",
" }\n",
"\n",
" function displayChart(vegaEmbed) {\n",
" vegaEmbed(outputDiv, spec, embedOpt)\n",
" .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
" }\n",
"\n",
" if(typeof define === \"function\" && define.amd) {\n",
" requirejs.config({paths});\n",
" require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
" } else {\n",
" maybeLoadScript(\"vega\", \"5\")\n",
" .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
" .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
" .catch(showError)\n",
" .then(() => displayChart(vegaEmbed));\n",
" }\n",
" })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-da2bacd5b3a57271f77be4dc435a345f\"}, \"mark\": \"bar\", \"encoding\": {\"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"handle\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-da2bacd5b3a57271f77be4dc435a345f\": [{\"handle\": \"polizei_ffm\", \"count\": 2993, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_nrw_do\", \"count\": 2860, \"Name\": \"Polizei NRW DO\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Dortmund\", \"LAT\": \"51.5142273\", \"LONG\": \"7.4652789\"}, {\"handle\": \"polizeisachsen\", \"count\": 2700, \"Name\": \"Polizei Sachsen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen\", \"Stadt\": \"Dresden\", \"LAT\": \"51.0493286\", \"LONG\": \"13.7381437\"}, {\"handle\": \"polizeibb\", \"count\": 2310, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizeihamburg\", \"count\": 2093, \"Name\": \"Polizei Hamburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Hamburg\", \"Stadt\": \"Hamburg\", \"LAT\": \"53.550341\", \"LONG\": \"10.000654\"}, {\"handle\": \"polizeimuenchen\", \"count\": 2021, \"Name\": \"Polizei M\\u00fcnchen\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"M\\u00fcnchen\", \"LAT\": \"48.135125\", \"LONG\": \"11.581981\"}, {\"handle\": \"polizeimfr\", \"count\": 1892, \"Name\": \"Polizei Mittelfranken\", \"Typ\": \"Polizei\", \"Bundesland\": \"Bayern\", \"Stadt\": \"N\\u00fcrnberg\", \"LAT\": \"49.453872\", \"LONG\": \"11.077298\"}, {\"handle\": \"polizeimannheim\", \"count\": 1835, \"Name\": \"Polizei Mannheim\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Mannheim\", \"LAT\": \"49.4892913\", \"LONG\": \"8.4673098\"}, {\"handle\": \"polizei_nrw_bi\", \"count\": 1794, \"Name\": \"Polizei NRW BI\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"Bielefeld\", \"LAT\": \"52.0191005\", \"LONG\": \"8.531007\"}, {\"handle\": \"polizei_nrw_k\", \"count\": 1540, \"Name\": \"Polizei NRW K\", \"Typ\": \"Polizei\", \"Bundesland\": \"Nordrhein-Westfalen\", \"Stadt\": \"K\\u00f6ln\", \"LAT\": \"50.938361\", \"LONG\": \"6.959974\"}, {\"handle\": \"bremenpolizei\", \"count\": 1417, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}, {\"handle\": \"polizei_kl\", \"count\": 1380, \"Name\": \"Polizei Kaiserslautern\", \"Typ\": \"Polizei\", \"Bundesland\": \"Rheinland-Pfalz\", \"Stadt\": \"Kaiserslautern\", \"LAT\": \"49.4432174\", \"LONG\": \"7.7689951\"}, {\"handle\": \"polizei_md\", \"count\": 1365, \"Name\": \"Polizei Magdeburg\", \"Typ\": \"Polizei\", \"Bundesland\": \"Sachsen-Anhalt\", \"Stadt\": \"Magdeburg\", \"LAT\": \"52.1315889\", \"LONG\": \"11.6399609\"}, {\"handle\": \"polizei_ka\", \"count\": 1356, \"Name\": \"Polizei Karlsruhe\", \"Typ\": \"Polizei\", \"Bundesland\": \"Baden-W\\u00fcrttemberg\", \"Stadt\": \"Karlsruhe\", \"LAT\": \"49.0068705\", \"LONG\": \"8.4034195\"}, {\"handle\": \"polizeiberlin\", \"count\": 1351, \"Name\": null, \"Typ\": null, \"Bundesland\": null, \"Stadt\": null, \"LAT\": null, \"LONG\": null}]}}, {\"mode\": \"vega-lite\"});\n",
"</script>"
],
"text/plain": [
"alt.Chart(...)"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"barchart = alt.Chart(activy_police_vis[0:15]).mark_bar().encode(\n",
" x = 'count:Q',\n",
" y = alt.Y('handle:N', sort = '-x'),\n",
")\n",
"barchart "
]
},
{
"cell_type": "markdown",
"id": "90f686ff-93c6-44d9-9761-feb35dfe9d1d",
"metadata": {
"tags": []
},
"source": [
"Welche Tweets ziehen besonders viel Aufmerksamkeit auf sich?"
]
},
{
"cell_type": "code",
"execution_count": 90,
"id": "d0549250-b11f-4762-8500-1134c53303b4",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"0 Die Gewalt, die unsere Kolleginnen &amp; Kollegen in der Silvesternacht erleben mussten, ist une...\n",
"1 WICHTIGE Info:\\nÜber das Internet wird derzeit ein Video verbreitet, in dem von einem Überfall a...\n",
"2 Die Experten gehen derzeit davon aus, dass es sich um ein absichtliches \"Fake-Video\" handelt, da...\n",
"3 Auf unserem #A45 in #lichterfelde) befindet sich gerade diese Fundhündin. Sie wurde am Hindenbur...\n",
"4 @nexta_tv Wir haben das Video gesichert und leiten den Sachverhalt an die zuständigen Kolleginne...\n",
" ... \n",
"84789 #Polizeimeldungen #Tagesticker\\n \\nAnhalt-Bitterfeld\\nhttps://t.co/tNLEzztL1o\\n \\nDessau-Roßlau\\...\n",
"84790 Am Mittwoch erhielten wir mehrere Anrufe über einen auffälligen Pkw-Fahrer (Reifen quietschen un...\n",
"84791 @Jonas5Luisa Kleiner Pro-Tipp von uns: Einfach mal auf den link klicken! ;)*cl\n",
"84792 Vermisstensuche nach 27-Jährigem aus Bendorf-Mühlhofen: Wer hat Tobias Wißmann gesehen? Ein Foto...\n",
"84793 #PolizeiNRW #Köln #Leverkusen : XXX - Infos unter https://t.co/SeWShP2tZE https://t.co/Kopy7w8W3B\n",
"Name: tweet_text, Length: 84794, dtype: object"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tweets_attention = tweets_combined.merge(tweets_user,\n",
" on = \"user_id\",\n",
" how = \"left\"\n",
" ).merge(police_stations,\n",
" on = \"handle\",\n",
" how = \"left\")\n",
"pd.options.display.max_colwidth = 100\n",
"tweets_attention.sort_values('like_count', ascending = False).reset_index()['tweet_text']\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 90,
"id": "97952234-7957-421e-bd2c-2c8261992c5a",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>handle</th>\n",
" <th>user_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1000004686156652545</td>\n",
" <td>6jannik9</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1000043230870867969</td>\n",
" <td>LSollik</td>\n",
" <td>Physiolucy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1000405847460151296</td>\n",
" <td>Achim1949Hans</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1000460805719121921</td>\n",
" <td>WahreW</td>\n",
" <td>WahreWorte</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1000744009638252544</td>\n",
" <td>derD1ck3</td>\n",
" <td>Ⓓ①ⓒⓚ①③ (🏡)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11554</th>\n",
" <td>99931264</td>\n",
" <td>Havok1975</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11555</th>\n",
" <td>999542638226403328</td>\n",
" <td>Madame_de_Saxe</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11556</th>\n",
" <td>999901133282754560</td>\n",
" <td>tungstendie74</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11557</th>\n",
" <td>999904275080794112</td>\n",
" <td>_danielheim</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11558</th>\n",
" <td>999955376454930432</td>\n",
" <td>amyman6010</td>\n",
" <td>Systemstratege:</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>11559 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" user_id handle user_name\n",
"0 1000004686156652545 6jannik9 Systemstratege: \n",
"1 1000043230870867969 LSollik Physiolucy\n",
"2 1000405847460151296 Achim1949Hans Systemstratege: \n",
"3 1000460805719121921 WahreW WahreWorte\n",
"4 1000744009638252544 derD1ck3 Ⓓ①ⓒⓚ①③ (🏡)\n",
"... ... ... ...\n",
"11554 99931264 Havok1975 Systemstratege: \n",
"11555 999542638226403328 Madame_de_Saxe Systemstratege: \n",
"11556 999901133282754560 tungstendie74 Systemstratege: \n",
"11557 999904275080794112 _danielheim Systemstratege: \n",
"11558 999955376454930432 amyman6010 Systemstratege: \n",
"\n",
"[11559 rows x 3 columns]"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"old = pd.read_csv(\"data/user_old.tsv\",sep = \"\\t\").rename(columns = {\"id\":\"user_id\",\"name\": \"user_name\"} )\n",
"new = pd.read_csv(\"data/tweets-1679742702794.csv\").rename(columns = {\"username\":\"handle\", \"handle\": \"user_name\"})\n",
"new"
]
},
{
"cell_type": "code",
"execution_count": 148,
"id": "ed86b45e-9dd8-436d-9c96-15500ed93985",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>like_count</th>\n",
" <th>retweet_count</th>\n",
" <th>reply_count</th>\n",
" <th>quote_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>151685</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>151686</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>151687</th>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>151688</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>151689</th>\n",
" <td>10</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>151690 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" like_count retweet_count reply_count quote_count\n",
"0 2 1 2 0\n",
"1 2 0 0 0\n",
"2 19 3 3 0\n",
"3 0 0 0 0\n",
"4 2 0 0 0\n",
"... ... ... ... ...\n",
"151685 5 1 1 0\n",
"151686 2 0 0 0\n",
"151687 6 0 0 0\n",
"151688 2 0 0 0\n",
"151689 10 1 0 0\n",
"\n",
"[151690 rows x 4 columns]"
]
},
"execution_count": 148,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 142,
"id": "dac4e5fc-22ca-466d-bc3c-586e68696d03",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"like_count\n",
"False 147573\n",
"True 4117\n",
"dtype: int64"
]
},
"execution_count": 142,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "python-scientific kernel",
"language": "python",
"name": "python-scientific"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}