1444 lines
47 KiB
Text
1444 lines
47 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "5eecbeeb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"from tqdm import tqdm # Fortschrittsanzeige für pandas\n",
|
|
"tqdm.pandas()\n",
|
|
"tweet_csv = '../data/copbird_table_tweet.csv'\n",
|
|
"entity_csv = '../data/copbird_table_entity.csv'\n",
|
|
"user_csv = '../data/copbird_table_user.csv'\n",
|
|
"geo_csv = '../geolocations/polizei_accounts_geo.csv'\n",
|
|
"\n",
|
|
"limit = None\n",
|
|
"tweets = pd.read_csv(tweet_csv, nrows=limit)\n",
|
|
"entities = pd.read_csv(entity_csv, nrows=limit)\n",
|
|
"users = pd.read_csv(user_csv, nrows=limit)\n",
|
|
"geo = pd.read_csv(geo_csv, nrows=limit, sep=\"\\t\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "2fbcec52",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#tweets[\"contains\"] = tweets[\"tweet_text\"].str.contains(r'(([@])\\w)')\n",
|
|
"#tweets_subset = tweets[tweets[\"contains\"]]\n",
|
|
"#tweets_subset[\"mention\"] = tweets_subset[\"tweet_text\"].str.extract(r'(([@])\\w)')\n",
|
|
"mentions = entities[entities[\"entity_type\"]==\"mention\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "3e1d2876",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>tweet_id</th>\n",
|
|
" <th>tag</th>\n",
|
|
" <th>entity_type</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1321021123463663616</td>\n",
|
|
" <td>mahanna196</td>\n",
|
|
" <td>mention</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1321025127388188673</td>\n",
|
|
" <td>bka</td>\n",
|
|
" <td>mention</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>1321028108665950208</td>\n",
|
|
" <td>StrupeitVolker</td>\n",
|
|
" <td>mention</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>1321029199998656513</td>\n",
|
|
" <td>bka</td>\n",
|
|
" <td>mention</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>1321032307277443072</td>\n",
|
|
" <td>Sitewinder</td>\n",
|
|
" <td>mention</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>1321033945467834368</td>\n",
|
|
" <td>GunstickULM</td>\n",
|
|
" <td>mention</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>1321034866377543680</td>\n",
|
|
" <td>Korbi161224</td>\n",
|
|
" <td>mention</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>1321035908146233344</td>\n",
|
|
" <td>GunstickULM</td>\n",
|
|
" <td>mention</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>1321036834579894272</td>\n",
|
|
" <td>retikulum383</td>\n",
|
|
" <td>mention</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>1321036834579894272</td>\n",
|
|
" <td>FPolitiker</td>\n",
|
|
" <td>mention</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" tweet_id tag entity_type\n",
|
|
"0 1321021123463663616 mahanna196 mention\n",
|
|
"1 1321025127388188673 bka mention\n",
|
|
"2 1321028108665950208 StrupeitVolker mention\n",
|
|
"3 1321029199998656513 bka mention\n",
|
|
"4 1321032307277443072 Sitewinder mention\n",
|
|
"5 1321033945467834368 GunstickULM mention\n",
|
|
"6 1321034866377543680 Korbi161224 mention\n",
|
|
"7 1321035908146233344 GunstickULM mention\n",
|
|
"8 1321036834579894272 retikulum383 mention\n",
|
|
"9 1321036834579894272 FPolitiker mention"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"mentions.head(10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "944f2535",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>id</th>\n",
|
|
" <th>user_id</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1321021123463663616</td>\n",
|
|
" <td>778895426007203840</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1321023114071969792</td>\n",
|
|
" <td>2397974054</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>1321025127388188673</td>\n",
|
|
" <td>2397974054</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>1321028108665950208</td>\n",
|
|
" <td>2810902381</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>1321029199998656513</td>\n",
|
|
" <td>223758384</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" id user_id\n",
|
|
"0 1321021123463663616 778895426007203840\n",
|
|
"1 1321023114071969792 2397974054\n",
|
|
"2 1321025127388188673 2397974054\n",
|
|
"3 1321028108665950208 2810902381\n",
|
|
"4 1321029199998656513 223758384"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"tweets_stripped = tweets[[\"id\", \"user_id\"]]\n",
|
|
"tweets_stripped.head(5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "d2440bee",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<ipython-input-5-d55278231e86>:4: SettingWithCopyWarning: \n",
|
|
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
|
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
|
"\n",
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
" merged[\"connection\"] = merged[\"handle\"]+ \"->\"+ merged[\"tag\"]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"mentions_inter = mentions.merge(tweets_stripped, left_on=\"tweet_id\", right_on=\"id\")\n",
|
|
"mentions_inter2 = mentions_inter.merge(users, left_on=\"user_id\", right_on=\"id\")\n",
|
|
"merged = mentions_inter2[[\"tweet_id\", \"user_id\", \"handle\", \"tag\"]]\n",
|
|
"merged[\"connection\"] = merged[\"handle\"]+ \"->\"+ merged[\"tag\"]\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "95d3a09d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"23796"
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"merged2 = merged.groupby('connection').size().reset_index(name='count').sort_values(by='count', ascending=False)\n",
|
|
"merged2.size"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "aea02d5d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>tweet_id</th>\n",
|
|
" <th>user_id</th>\n",
|
|
" <th>handle</th>\n",
|
|
" <th>tag</th>\n",
|
|
" <th>connection</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1321021123463663616</td>\n",
|
|
" <td>778895426007203840</td>\n",
|
|
" <td>Polizei_OL</td>\n",
|
|
" <td>mahanna196</td>\n",
|
|
" <td>Polizei_OL->mahanna196</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1321037834246066181</td>\n",
|
|
" <td>778895426007203840</td>\n",
|
|
" <td>Polizei_OL</td>\n",
|
|
" <td>mahanna196</td>\n",
|
|
" <td>Polizei_OL->mahanna196</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>1321073940199100416</td>\n",
|
|
" <td>778895426007203840</td>\n",
|
|
" <td>Polizei_OL</td>\n",
|
|
" <td>Emma36166433</td>\n",
|
|
" <td>Polizei_OL->Emma36166433</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>1321096827937185793</td>\n",
|
|
" <td>778895426007203840</td>\n",
|
|
" <td>Polizei_OL</td>\n",
|
|
" <td>line9600</td>\n",
|
|
" <td>Polizei_OL->line9600</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>1321338058600570880</td>\n",
|
|
" <td>778895426007203840</td>\n",
|
|
" <td>Polizei_OL</td>\n",
|
|
" <td>Keroth_Katlatas</td>\n",
|
|
" <td>Polizei_OL->Keroth_Katlatas</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" tweet_id user_id handle tag \\\n",
|
|
"0 1321021123463663616 778895426007203840 Polizei_OL mahanna196 \n",
|
|
"1 1321037834246066181 778895426007203840 Polizei_OL mahanna196 \n",
|
|
"2 1321073940199100416 778895426007203840 Polizei_OL Emma36166433 \n",
|
|
"3 1321096827937185793 778895426007203840 Polizei_OL line9600 \n",
|
|
"4 1321338058600570880 778895426007203840 Polizei_OL Keroth_Katlatas \n",
|
|
"\n",
|
|
" connection \n",
|
|
"0 Polizei_OL->mahanna196 \n",
|
|
"1 Polizei_OL->mahanna196 \n",
|
|
"2 Polizei_OL->Emma36166433 \n",
|
|
"3 Polizei_OL->line9600 \n",
|
|
"4 Polizei_OL->Keroth_Katlatas "
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"merged.head(5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "75f75a62",
|
|
"metadata": {},
|
|
"source": [
|
|
"connection count ist mit \"duplicates\" weil mit tweet_ids, cc_wo_id (connection count without id) ist darum bereingt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "375fc61c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"connection_count = pd.merge(merged2, merged, how=\"left\", on=\"connection\")\n",
|
|
"cc_wo_id = connection_count[[\"connection\", \"count\", \"user_id\", \"handle\", \"tag\"]]\n",
|
|
"cc_wo_id = cc_wo_id.drop_duplicates()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "d11693e8",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "KeyboardInterrupt",
|
|
"evalue": "",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
|
"\u001b[0;32m<ipython-input-9-dd5bbb2bd163>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mcop_net\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_node\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtitle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0mcop_net\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_node\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdst\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdst\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtitle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdst\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mcop_net\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_edge\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdst\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mneighbor_map\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcop_net\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_adj_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
"\u001b[0;32m/usr/local/lib/python3.8/dist-packages/pyvis/network.py\u001b[0m in \u001b[0;36madd_edge\u001b[0;34m(self, source, to, **options)\u001b[0m\n\u001b[1;32m 346\u001b[0m \u001b[0mdest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'to'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 347\u001b[0m if (\n\u001b[0;32m--> 348\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mdest\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mto\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mfrm\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 349\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0msource\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mfrm\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mto\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mdest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 350\u001b[0m ):\n",
|
|
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from pyvis.network import Network\n",
|
|
"\n",
|
|
"data = cc_wo_id.head(15000)\n",
|
|
"\n",
|
|
"cop_net = Network(height='500px', width='100%', bgcolor='#222222', font_color='white', notebook=True)\n",
|
|
"\n",
|
|
"cop_net.barnes_hut()\n",
|
|
"\n",
|
|
"sources = data['handle']\n",
|
|
"targets = data['tag']\n",
|
|
"weights = data['count']\n",
|
|
"\n",
|
|
"edge_data = zip(sources, targets, weights)\n",
|
|
"\n",
|
|
"for e in edge_data:\n",
|
|
" src = e[0]\n",
|
|
" dst = e[1]\n",
|
|
" w = e[2]\n",
|
|
"\n",
|
|
" cop_net.add_node(src, src, title=src)\n",
|
|
" cop_net.add_node(dst, dst, title=dst)\n",
|
|
" cop_net.add_edge(src, dst, value=w)\n",
|
|
"\n",
|
|
"neighbor_map = cop_net.get_adj_list()\n",
|
|
"\n",
|
|
"# add neighbor data to node hover data\n",
|
|
"for node in cop_net.nodes:\n",
|
|
" node['title'] += ' Neighbors:<br>' + '<br>'.join(neighbor_map[node['id']])\n",
|
|
" node['value'] = len(neighbor_map[node['id']])\n",
|
|
"\n",
|
|
"#cop_net.show('copnet.html')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "f706b956",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>connection</th>\n",
|
|
" <th>count</th>\n",
|
|
" <th>user_id</th>\n",
|
|
" <th>handle</th>\n",
|
|
" <th>tag</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Polizei_Ffm->Stadt_FFM</td>\n",
|
|
" <td>137</td>\n",
|
|
" <td>2272909014</td>\n",
|
|
" <td>Polizei_Ffm</td>\n",
|
|
" <td>Stadt_FFM</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>137</th>\n",
|
|
" <td>polizei_nrw_me->KreisMettmann</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>2389359068</td>\n",
|
|
" <td>polizei_nrw_me</td>\n",
|
|
" <td>KreisMettmann</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>269</th>\n",
|
|
" <td>Polizei_OHA->Polizei_GOE</td>\n",
|
|
" <td>82</td>\n",
|
|
" <td>773805850687340544</td>\n",
|
|
" <td>Polizei_OHA</td>\n",
|
|
" <td>Polizei_GOE</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>351</th>\n",
|
|
" <td>LkaBaWue->LkaBaWue</td>\n",
|
|
" <td>78</td>\n",
|
|
" <td>814455464394182656</td>\n",
|
|
" <td>LkaBaWue</td>\n",
|
|
" <td>LkaBaWue</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>429</th>\n",
|
|
" <td>Polizei_nrw_ms->Feuerwehr_MS</td>\n",
|
|
" <td>69</td>\n",
|
|
" <td>2284811875</td>\n",
|
|
" <td>Polizei_nrw_ms</td>\n",
|
|
" <td>Feuerwehr_MS</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>498</th>\n",
|
|
" <td>Polizei_KA->Natenom</td>\n",
|
|
" <td>65</td>\n",
|
|
" <td>3029998264</td>\n",
|
|
" <td>Polizei_KA</td>\n",
|
|
" <td>Natenom</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>563</th>\n",
|
|
" <td>Polizei_KA->axel_gutmann</td>\n",
|
|
" <td>61</td>\n",
|
|
" <td>3029998264</td>\n",
|
|
" <td>Polizei_KA</td>\n",
|
|
" <td>axel_gutmann</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>624</th>\n",
|
|
" <td>polizei_nrw_do->stadtdortmund</td>\n",
|
|
" <td>61</td>\n",
|
|
" <td>769128278</td>\n",
|
|
" <td>polizei_nrw_do</td>\n",
|
|
" <td>stadtdortmund</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>685</th>\n",
|
|
" <td>Polizei_nrw_ms->muenster_de</td>\n",
|
|
" <td>59</td>\n",
|
|
" <td>2284811875</td>\n",
|
|
" <td>Polizei_nrw_ms</td>\n",
|
|
" <td>muenster_de</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>744</th>\n",
|
|
" <td>polizei_nrw_bo->FW_Bochum</td>\n",
|
|
" <td>57</td>\n",
|
|
" <td>2389155192</td>\n",
|
|
" <td>polizei_nrw_bo</td>\n",
|
|
" <td>FW_Bochum</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" connection count user_id handle \\\n",
|
|
"0 Polizei_Ffm->Stadt_FFM 137 2272909014 Polizei_Ffm \n",
|
|
"137 polizei_nrw_me->KreisMettmann 132 2389359068 polizei_nrw_me \n",
|
|
"269 Polizei_OHA->Polizei_GOE 82 773805850687340544 Polizei_OHA \n",
|
|
"351 LkaBaWue->LkaBaWue 78 814455464394182656 LkaBaWue \n",
|
|
"429 Polizei_nrw_ms->Feuerwehr_MS 69 2284811875 Polizei_nrw_ms \n",
|
|
"498 Polizei_KA->Natenom 65 3029998264 Polizei_KA \n",
|
|
"563 Polizei_KA->axel_gutmann 61 3029998264 Polizei_KA \n",
|
|
"624 polizei_nrw_do->stadtdortmund 61 769128278 polizei_nrw_do \n",
|
|
"685 Polizei_nrw_ms->muenster_de 59 2284811875 Polizei_nrw_ms \n",
|
|
"744 polizei_nrw_bo->FW_Bochum 57 2389155192 polizei_nrw_bo \n",
|
|
"\n",
|
|
" tag \n",
|
|
"0 Stadt_FFM \n",
|
|
"137 KreisMettmann \n",
|
|
"269 Polizei_GOE \n",
|
|
"351 LkaBaWue \n",
|
|
"429 Feuerwehr_MS \n",
|
|
"498 Natenom \n",
|
|
"563 axel_gutmann \n",
|
|
"624 stadtdortmund \n",
|
|
"685 muenster_de \n",
|
|
"744 FW_Bochum "
|
|
]
|
|
},
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"cc_wo_id.head(10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "6de04c9f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"geo_neu = geo[[\"Polizei Account\", \"LAT\", \"LONG\"]]\n",
|
|
"geo_neu2 = geo[[\"Polizei Account\", \"LAT\", \"LONG\"]]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "237cf51b",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Polizei Account</th>\n",
|
|
" <th>LAT</th>\n",
|
|
" <th>LONG</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>bpol_11</td>\n",
|
|
" <td>-</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Polizei Account LAT LONG\n",
|
|
"0 bpol_11 - NaN"
|
|
]
|
|
},
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"geo_neu.head(1)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "c50a74a7",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<ipython-input-13-9989204024e1>:5: SettingWithCopyWarning: \n",
|
|
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
|
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
|
"\n",
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
" geo_neu['Polizei Account'] = geo_neu['Polizei Account'].str.lower()\n",
|
|
"<ipython-input-13-9989204024e1>:6: SettingWithCopyWarning: \n",
|
|
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
|
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
|
"\n",
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
" geo_neu['Polizei Account'] = geo_neu['Polizei Account'].str.strip()\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"cc_wo_id['handle'] = cc_wo_id['handle'].str.lower()\n",
|
|
"cc_wo_id['handle'] = cc_wo_id['handle'].str.strip()\n",
|
|
"cc_wo_id['tag'] = cc_wo_id['tag'].str.lower()\n",
|
|
"cc_wo_id['tag'] = cc_wo_id['tag'].str.strip()\n",
|
|
"geo_neu['Polizei Account'] = geo_neu['Polizei Account'].str.lower()\n",
|
|
"geo_neu['Polizei Account'] = geo_neu['Polizei Account'].str.strip()\n",
|
|
"merged_geo = pd.merge(cc_wo_id, geo_neu, how=\"left\", left_on=\"handle\", right_on=\"Polizei Account\")\n",
|
|
"merged_geo = merged_geo.rename(columns={\"LAT\": \"LAT1\", \"LONG\": \"LONG1\"})\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"id": "2f85d26d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>connection</th>\n",
|
|
" <th>count</th>\n",
|
|
" <th>user_id</th>\n",
|
|
" <th>handle</th>\n",
|
|
" <th>tag</th>\n",
|
|
" <th>Polizei Account</th>\n",
|
|
" <th>LAT1</th>\n",
|
|
" <th>LONG1</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Polizei_Ffm->Stadt_FFM</td>\n",
|
|
" <td>137</td>\n",
|
|
" <td>2272909014</td>\n",
|
|
" <td>polizei_ffm</td>\n",
|
|
" <td>stadt_ffm</td>\n",
|
|
" <td>polizei_ffm</td>\n",
|
|
" <td>50.110922</td>\n",
|
|
" <td>8.682127</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>polizei_nrw_me->KreisMettmann</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>2389359068</td>\n",
|
|
" <td>polizei_nrw_me</td>\n",
|
|
" <td>kreismettmann</td>\n",
|
|
" <td>polizei_nrw_me</td>\n",
|
|
" <td>51.2527778</td>\n",
|
|
" <td>6.9777778</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Polizei_OHA->Polizei_GOE</td>\n",
|
|
" <td>82</td>\n",
|
|
" <td>773805850687340544</td>\n",
|
|
" <td>polizei_oha</td>\n",
|
|
" <td>polizei_goe</td>\n",
|
|
" <td>polizei_oha</td>\n",
|
|
" <td>51.72784</td>\n",
|
|
" <td>10.2508204</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>LkaBaWue->LkaBaWue</td>\n",
|
|
" <td>78</td>\n",
|
|
" <td>814455464394182656</td>\n",
|
|
" <td>lkabawue</td>\n",
|
|
" <td>lkabawue</td>\n",
|
|
" <td>lkabawue</td>\n",
|
|
" <td>48.775846</td>\n",
|
|
" <td>9.182932</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Polizei_nrw_ms->Feuerwehr_MS</td>\n",
|
|
" <td>69</td>\n",
|
|
" <td>2284811875</td>\n",
|
|
" <td>polizei_nrw_ms</td>\n",
|
|
" <td>feuerwehr_ms</td>\n",
|
|
" <td>polizei_nrw_ms</td>\n",
|
|
" <td>51.9625101</td>\n",
|
|
" <td>7.6251879</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" connection count user_id handle \\\n",
|
|
"0 Polizei_Ffm->Stadt_FFM 137 2272909014 polizei_ffm \n",
|
|
"1 polizei_nrw_me->KreisMettmann 132 2389359068 polizei_nrw_me \n",
|
|
"2 Polizei_OHA->Polizei_GOE 82 773805850687340544 polizei_oha \n",
|
|
"3 LkaBaWue->LkaBaWue 78 814455464394182656 lkabawue \n",
|
|
"4 Polizei_nrw_ms->Feuerwehr_MS 69 2284811875 polizei_nrw_ms \n",
|
|
"\n",
|
|
" tag Polizei Account LAT1 LONG1 \n",
|
|
"0 stadt_ffm polizei_ffm 50.110922 8.682127 \n",
|
|
"1 kreismettmann polizei_nrw_me 51.2527778 6.9777778 \n",
|
|
"2 polizei_goe polizei_oha 51.72784 10.2508204 \n",
|
|
"3 lkabawue lkabawue 48.775846 9.182932 \n",
|
|
"4 feuerwehr_ms polizei_nrw_ms 51.9625101 7.6251879 "
|
|
]
|
|
},
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"merged_geo.head(5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "8442525b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"merged_geo2 = pd.merge(merged_geo, geo_neu, how=\"left\", left_on=\"tag\", right_on=\"Polizei Account\")\n",
|
|
"merged_geo2 = merged_geo2.rename(columns={\"LAT\": \"LAT2\", \"LONG\": \"LONG2\"})"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"id": "3defbeb9",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>connection</th>\n",
|
|
" <th>count</th>\n",
|
|
" <th>user_id</th>\n",
|
|
" <th>handle</th>\n",
|
|
" <th>tag</th>\n",
|
|
" <th>Polizei Account_x</th>\n",
|
|
" <th>LAT1</th>\n",
|
|
" <th>LONG1</th>\n",
|
|
" <th>Polizei Account_y</th>\n",
|
|
" <th>LAT2</th>\n",
|
|
" <th>LONG2</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Polizei_Ffm->Stadt_FFM</td>\n",
|
|
" <td>137</td>\n",
|
|
" <td>2272909014</td>\n",
|
|
" <td>polizei_ffm</td>\n",
|
|
" <td>stadt_ffm</td>\n",
|
|
" <td>polizei_ffm</td>\n",
|
|
" <td>50.110922</td>\n",
|
|
" <td>8.682127</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>polizei_nrw_me->KreisMettmann</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>2389359068</td>\n",
|
|
" <td>polizei_nrw_me</td>\n",
|
|
" <td>kreismettmann</td>\n",
|
|
" <td>polizei_nrw_me</td>\n",
|
|
" <td>51.2527778</td>\n",
|
|
" <td>6.9777778</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Polizei_OHA->Polizei_GOE</td>\n",
|
|
" <td>82</td>\n",
|
|
" <td>773805850687340544</td>\n",
|
|
" <td>polizei_oha</td>\n",
|
|
" <td>polizei_goe</td>\n",
|
|
" <td>polizei_oha</td>\n",
|
|
" <td>51.72784</td>\n",
|
|
" <td>10.2508204</td>\n",
|
|
" <td>polizei_goe</td>\n",
|
|
" <td>51.541280</td>\n",
|
|
" <td>9.915804</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>LkaBaWue->LkaBaWue</td>\n",
|
|
" <td>78</td>\n",
|
|
" <td>814455464394182656</td>\n",
|
|
" <td>lkabawue</td>\n",
|
|
" <td>lkabawue</td>\n",
|
|
" <td>lkabawue</td>\n",
|
|
" <td>48.775846</td>\n",
|
|
" <td>9.182932</td>\n",
|
|
" <td>lkabawue</td>\n",
|
|
" <td>48.775846</td>\n",
|
|
" <td>9.182932</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Polizei_nrw_ms->Feuerwehr_MS</td>\n",
|
|
" <td>69</td>\n",
|
|
" <td>2284811875</td>\n",
|
|
" <td>polizei_nrw_ms</td>\n",
|
|
" <td>feuerwehr_ms</td>\n",
|
|
" <td>polizei_nrw_ms</td>\n",
|
|
" <td>51.9625101</td>\n",
|
|
" <td>7.6251879</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" connection count user_id handle \\\n",
|
|
"0 Polizei_Ffm->Stadt_FFM 137 2272909014 polizei_ffm \n",
|
|
"1 polizei_nrw_me->KreisMettmann 132 2389359068 polizei_nrw_me \n",
|
|
"2 Polizei_OHA->Polizei_GOE 82 773805850687340544 polizei_oha \n",
|
|
"3 LkaBaWue->LkaBaWue 78 814455464394182656 lkabawue \n",
|
|
"4 Polizei_nrw_ms->Feuerwehr_MS 69 2284811875 polizei_nrw_ms \n",
|
|
"\n",
|
|
" tag Polizei Account_x LAT1 LONG1 Polizei Account_y \\\n",
|
|
"0 stadt_ffm polizei_ffm 50.110922 8.682127 NaN \n",
|
|
"1 kreismettmann polizei_nrw_me 51.2527778 6.9777778 NaN \n",
|
|
"2 polizei_goe polizei_oha 51.72784 10.2508204 polizei_goe \n",
|
|
"3 lkabawue lkabawue 48.775846 9.182932 lkabawue \n",
|
|
"4 feuerwehr_ms polizei_nrw_ms 51.9625101 7.6251879 NaN \n",
|
|
"\n",
|
|
" LAT2 LONG2 \n",
|
|
"0 NaN NaN \n",
|
|
"1 NaN NaN \n",
|
|
"2 51.541280 9.915804 \n",
|
|
"3 48.775846 9.182932 \n",
|
|
"4 NaN NaN "
|
|
]
|
|
},
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"merged_geo2.head(5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"id": "4dc51158",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>LAT1</th>\n",
|
|
" <th>LONG1</th>\n",
|
|
" <th>LAT2</th>\n",
|
|
" <th>LONG2</th>\n",
|
|
" <th>count</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>51.72784</td>\n",
|
|
" <td>10.2508204</td>\n",
|
|
" <td>51.541280</td>\n",
|
|
" <td>9.915804</td>\n",
|
|
" <td>82</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>48.775846</td>\n",
|
|
" <td>9.182932</td>\n",
|
|
" <td>48.775846</td>\n",
|
|
" <td>9.182932</td>\n",
|
|
" <td>78</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>22</th>\n",
|
|
" <td>48.3974003</td>\n",
|
|
" <td>9.9934336</td>\n",
|
|
" <td>48.775846</td>\n",
|
|
" <td>9.182932</td>\n",
|
|
" <td>36</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>23</th>\n",
|
|
" <td>52.120533</td>\n",
|
|
" <td>11.627624</td>\n",
|
|
" <td>51.0493286</td>\n",
|
|
" <td>13.7381437</td>\n",
|
|
" <td>35</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>24</th>\n",
|
|
" <td>52.1315889</td>\n",
|
|
" <td>11.6399609</td>\n",
|
|
" <td>52.6050782</td>\n",
|
|
" <td>11.8594279</td>\n",
|
|
" <td>34</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" LAT1 LONG1 LAT2 LONG2 count\n",
|
|
"2 51.72784 10.2508204 51.541280 9.915804 82\n",
|
|
"3 48.775846 9.182932 48.775846 9.182932 78\n",
|
|
"22 48.3974003 9.9934336 48.775846 9.182932 36\n",
|
|
"23 52.120533 11.627624 51.0493286 13.7381437 35\n",
|
|
"24 52.1315889 11.6399609 52.6050782 11.8594279 34"
|
|
]
|
|
},
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"geo_stripped = merged_geo2[[\"LAT1\", \"LONG1\", \"LAT2\", \"LONG2\", \"count\"]]\n",
|
|
"geo_stripped = geo_stripped.dropna()\n",
|
|
"geo_stripped = geo_stripped[geo_stripped.LAT1 != \"-\"]\n",
|
|
"geo_stripped = geo_stripped[geo_stripped.LAT2 != \"-\"]\n",
|
|
"geo_stripped.head(5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"id": "7267cf7f",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"LAT1 object\n",
|
|
"LONG1 object\n",
|
|
"LAT2 object\n",
|
|
"LONG2 object\n",
|
|
"count int64\n",
|
|
"dtype: object"
|
|
]
|
|
},
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"geo_stripped.dtypes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"id": "9bf0c75e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"geo_stripped[\"LAT1\"] = geo_stripped[\"LAT1\"].astype(float)\n",
|
|
"geo_stripped[\"LONG1\"] = geo_stripped[\"LONG1\"].astype(float)\n",
|
|
"geo_stripped[\"LAT2\"] = geo_stripped[\"LAT2\"].astype(float)\n",
|
|
"geo_stripped[\"LONG2\"] = geo_stripped[\"LONG2\"].astype(float)\n",
|
|
"geo_stripped[\"count\"] = geo_stripped[\"count\"].astype(int)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"id": "7206e2d7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"geo_stripped = geo_stripped.reset_index()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"id": "67612718",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import folium\n",
|
|
"\n",
|
|
"point = []\n",
|
|
"\n",
|
|
"\n",
|
|
"for i in geo_stripped.index:\n",
|
|
" point.append(([geo_stripped[\"LAT1\"][i], geo_stripped[\"LONG1\"][i]], \n",
|
|
" [geo_stripped[\"LAT2\"][i], geo_stripped[\"LONG2\"][i]]))\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"id": "625c4037",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#ave_lat = sum(p[0] for p in point)/len(point)\n",
|
|
"#ave_lon = sum(p[1] for p in point)/len(point)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 81,
|
|
"id": "944eafa2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Load map centred on average coordinates\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 82,
|
|
"id": "9e05a2ee",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>index</th>\n",
|
|
" <th>LAT1</th>\n",
|
|
" <th>LONG1</th>\n",
|
|
" <th>LAT2</th>\n",
|
|
" <th>LONG2</th>\n",
|
|
" <th>count</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>2</td>\n",
|
|
" <td>51.727840</td>\n",
|
|
" <td>10.250820</td>\n",
|
|
" <td>51.541280</td>\n",
|
|
" <td>9.915804</td>\n",
|
|
" <td>82</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>3</td>\n",
|
|
" <td>48.775846</td>\n",
|
|
" <td>9.182932</td>\n",
|
|
" <td>48.775846</td>\n",
|
|
" <td>9.182932</td>\n",
|
|
" <td>78</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>22</td>\n",
|
|
" <td>48.397400</td>\n",
|
|
" <td>9.993434</td>\n",
|
|
" <td>48.775846</td>\n",
|
|
" <td>9.182932</td>\n",
|
|
" <td>36</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>23</td>\n",
|
|
" <td>52.120533</td>\n",
|
|
" <td>11.627624</td>\n",
|
|
" <td>51.049329</td>\n",
|
|
" <td>13.738144</td>\n",
|
|
" <td>35</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>24</td>\n",
|
|
" <td>52.131589</td>\n",
|
|
" <td>11.639961</td>\n",
|
|
" <td>52.605078</td>\n",
|
|
" <td>11.859428</td>\n",
|
|
" <td>34</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" index LAT1 LONG1 LAT2 LONG2 count\n",
|
|
"0 2 51.727840 10.250820 51.541280 9.915804 82\n",
|
|
"1 3 48.775846 9.182932 48.775846 9.182932 78\n",
|
|
"2 22 48.397400 9.993434 48.775846 9.182932 36\n",
|
|
"3 23 52.120533 11.627624 51.049329 13.738144 35\n",
|
|
"4 24 52.131589 11.639961 52.605078 11.859428 34"
|
|
]
|
|
},
|
|
"execution_count": 82,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"geo_stripped.head(5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 83,
|
|
"id": "480d4bd8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"thickness = geo_stripped[\"count\"][0]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 96,
|
|
"id": "ac0dcc25",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<ipython-input-96-365be8339e30>:4: SettingWithCopyWarning: \n",
|
|
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
|
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
|
"\n",
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
" geo_handle[\"LAT\"] = geo_handle[\"LAT\"].astype(float)\n",
|
|
"<ipython-input-96-365be8339e30>:5: SettingWithCopyWarning: \n",
|
|
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
|
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
|
"\n",
|
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|
" geo_handle[\"LONG\"] = geo_handle[\"LONG\"].astype(float)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"geo_handle_pre = geo[[\"Polizei Account\", \"Name\", \"LAT\", \"LONG\"]]\n",
|
|
"geo_handle_pre = geo_handle_pre.dropna()\n",
|
|
"geo_handle = geo_handle_pre[geo_handle_pre.LAT != \"-\"]\n",
|
|
"geo_handle[\"LAT\"] = geo_handle[\"LAT\"].astype(float)\n",
|
|
"geo_handle[\"LONG\"] = geo_handle[\"LONG\"].astype(float)\n",
|
|
"geo_handle = geo_handle.reset_index()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 99,
|
|
"id": "c2694f60",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import colorsys\n",
|
|
"import branca\n",
|
|
"import branca.colormap as cm\n",
|
|
"\n",
|
|
"my_map = folium.Map(location=[geo_stripped[\"LAT1\"][0], geo_stripped[\"LONG1\"][0],], zoom_start=8)\n",
|
|
"\n",
|
|
"i = 0\n",
|
|
"for i in geo_handle.index:\n",
|
|
" text = geo_handle[\"Name\"].iloc[i]\n",
|
|
" name_lat = geo_handle[\"LAT\"].iloc[i]\n",
|
|
" name_long = geo_handle[\"LONG\"].iloc[i]\n",
|
|
" #print(i)\n",
|
|
" folium.map.Marker(\n",
|
|
" [name_lat, name_long],\n",
|
|
" icon=folium.DivIcon(icon_size=(20,10),\n",
|
|
" icon_anchor=(0,0), html='<div style=\"font-size: 12pt\">%s</div>'%text)).add_to(my_map)\n",
|
|
"\n",
|
|
"colormap = cm.LinearColormap(colors=['lightblue', 'orange', 'red'], index=[0,5,60],vmin=0,vmax=90)\n",
|
|
"i = 0\n",
|
|
"for j in point:\n",
|
|
" thickness = int(geo_stripped[\"count\"][i])\n",
|
|
" #folium.PolyLine(j, color=rgb, weight=thickness, opacity=1).add_to(my_map)\n",
|
|
" folium.PolyLine(j, color=colormap(thickness), weight=thickness, opacity=0.8).add_to(my_map)\n",
|
|
" i = i+1\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 100,
|
|
"id": "4865f7d4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Save map\n",
|
|
"my_map.save(\"./connections_with_name.html\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 101,
|
|
"id": "45ad95d5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"my_map_ohne = folium.Map(location=[geo_stripped[\"LAT1\"][0], geo_stripped[\"LONG1\"][0],], zoom_start=8)\n",
|
|
"\n",
|
|
"colormap = cm.LinearColormap(colors=['lightblue', 'orange', 'red'], index=[0,5,60],vmin=0,vmax=90)\n",
|
|
"i = 0\n",
|
|
"for j in point:\n",
|
|
" thickness = int(geo_stripped[\"count\"][i])\n",
|
|
" #folium.PolyLine(j, color=rgb, weight=thickness, opacity=1).add_to(my_map)\n",
|
|
" folium.PolyLine(j, color=colormap(thickness), weight=thickness, opacity=0.8).add_to(my_map_ohne)\n",
|
|
" i = i+1\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 102,
|
|
"id": "42212a74",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Save map\n",
|
|
"my_map_ohne.save(\"./connections_without_name.html\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d1d73f6d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|