Adds Node to precalculate jsons for visualisations
This commit is contained in:
parent
7c2e34906e
commit
d9d0441ddd
3 changed files with 194 additions and 18 deletions
|
|
@ -2,7 +2,7 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 12,
|
||||
"id": "0ab5f064",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
|
@ -24,7 +24,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 13,
|
||||
"id": "94b2e3d9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
|
@ -62,7 +62,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 14,
|
||||
"id": "b3924728",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -76,7 +76,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 15,
|
||||
"id": "c0fdb0ba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -89,7 +89,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 16,
|
||||
"id": "df5c31b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -102,7 +102,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 17,
|
||||
"id": "101b971d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -124,7 +124,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 18,
|
||||
"id": "2f23046d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -151,7 +151,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 19,
|
||||
"id": "d4ae65f1",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
|
|
@ -171,7 +171,7 @@
|
|||
" JOIN tags t1 ON t1.id = pt1.tag_id\n",
|
||||
" JOIN tags t2 ON t2.id = pt2.tag_id\n",
|
||||
" GROUP BY t1.tag, t2.tag\n",
|
||||
" HAVING weight > 3\n",
|
||||
" HAVING weight > 1\n",
|
||||
" ORDER BY weight DESC;\n",
|
||||
"\"\"\")\n",
|
||||
"\n",
|
||||
|
|
@ -181,7 +181,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 20,
|
||||
"id": "13062474",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
|
|
@ -192,24 +192,27 @@
|
|||
"source": [
|
||||
"q = query_db(\"\"\"\n",
|
||||
"select\n",
|
||||
"round(umap_x, 3) as umap_x,\n",
|
||||
"round(umap_y, 3) as umap_y,\n",
|
||||
"round(umap_z, 3) as umap_z,\n",
|
||||
"posts.id, title\n",
|
||||
"cast(umap_x*10 as int) as x,\n",
|
||||
"cast(umap_y*10 as int) as y,\n",
|
||||
"cast(umap_z*10 as int) as z,\n",
|
||||
"posts.id as id, category_id as c,\n",
|
||||
"SUBSTRING(title, 1, 12) as t\n",
|
||||
"\n",
|
||||
"from posts\n",
|
||||
"inner join postcategories on post_id = posts.id\n",
|
||||
"inner join categories on category_id = categories.id\n",
|
||||
"where date > '2020-01-01' and categories.category IN ('Theorie und Diskussion', 'Praxis')\n",
|
||||
"\n",
|
||||
"\"\"\")\n",
|
||||
"\n",
|
||||
"#where date > '2020-01-01' and categories.category NOT IN ('Presseartikel')\n",
|
||||
"\n",
|
||||
"with open('json/umap_embeddings.json', 'w') as f:\n",
|
||||
" f.write(json.dumps(q))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 21,
|
||||
"id": "e5378b17",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
|
|
@ -239,7 +242,7 @@
|
|||
"SELECT \n",
|
||||
" tld AS source, \n",
|
||||
" CASE \n",
|
||||
" WHEN host_count < 15 THEN 'other'\n",
|
||||
" WHEN host_count < 10 THEN 'other'\n",
|
||||
" ELSE host \n",
|
||||
" END AS target, \n",
|
||||
" SUM(host_count) AS value\n",
|
||||
|
|
@ -249,7 +252,7 @@
|
|||
" WHERE tld IS NOT NULL AND host IS NOT NULL \n",
|
||||
" GROUP BY tld, host\n",
|
||||
")\n",
|
||||
"WHERE source != \"\"\n",
|
||||
"WHERE source != \"\" AND target != 'other'\n",
|
||||
"GROUP BY tld, target\n",
|
||||
"\"\"\")\n",
|
||||
"\n",
|
||||
|
|
@ -259,6 +262,61 @@
|
|||
"with open('json/urls_l2.json', 'w') as f:\n",
|
||||
" f.write(json.dumps(q2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "1501cb06",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "ruby"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'author_name': 'Antifa', 'tag': 'Antifaschismus', 'tag_count': 9},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Antirassismus', 'tag_count': 4},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Antisemitismus', 'tag_count': 4},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Feminismus', 'tag_count': 3},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Recherche', 'tag_count': 3},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Antisexismus', 'tag_count': 3},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Repression', 'tag_count': 2},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Diskussion', 'tag_count': 2},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Medien', 'tag_count': 2},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Solidarität', 'tag_count': 1},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Freiräume', 'tag_count': 1},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Knast', 'tag_count': 1},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Militanz', 'tag_count': 1},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Nationalsozialismus', 'tag_count': 1},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Auswertung', 'tag_count': 1},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Demonstration', 'tag_count': 1},\n",
|
||||
" {'author_name': 'Antifa', 'tag': 'Krieg', 'tag_count': 1}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"q = query_db(\"\"\"\n",
|
||||
" SELECT \n",
|
||||
" a.name AS author_name,\n",
|
||||
" t.tag,\n",
|
||||
" COUNT(*) AS tag_count\n",
|
||||
"FROM authors a\n",
|
||||
"JOIN post_authors pa ON a.id = pa.author_id\n",
|
||||
"JOIN posttags pt ON pa.post_id = pt.post_id\n",
|
||||
"JOIN tags t ON pt.tag_id = t.id\n",
|
||||
"WHERE a.name = 'Antifa'\n",
|
||||
"GROUP BY a.id, a.name, t.id, t.tag\n",
|
||||
"ORDER BY tag_count DESC;\n",
|
||||
"\"\"\")\n",
|
||||
"\n",
|
||||
"q"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue