Adds Node to precalculate jsons for visualisations

This commit is contained in:
quorploop 2026-01-29 22:08:01 +01:00
parent 7c2e34906e
commit d9d0441ddd
3 changed files with 194 additions and 18 deletions

View file

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 12,
"id": "0ab5f064",
"metadata": {},
"outputs": [
@ -24,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 13,
"id": "94b2e3d9",
"metadata": {},
"outputs": [
@ -62,7 +62,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 14,
"id": "b3924728",
"metadata": {},
"outputs": [],
@ -76,7 +76,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 15,
"id": "c0fdb0ba",
"metadata": {},
"outputs": [],
@ -89,7 +89,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 16,
"id": "df5c31b3",
"metadata": {},
"outputs": [],
@ -102,7 +102,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 17,
"id": "101b971d",
"metadata": {},
"outputs": [],
@ -124,7 +124,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 18,
"id": "2f23046d",
"metadata": {},
"outputs": [],
@ -151,7 +151,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 19,
"id": "d4ae65f1",
"metadata": {
"vscode": {
@ -171,7 +171,7 @@
" JOIN tags t1 ON t1.id = pt1.tag_id\n",
" JOIN tags t2 ON t2.id = pt2.tag_id\n",
" GROUP BY t1.tag, t2.tag\n",
" HAVING weight > 3\n",
" HAVING weight > 1\n",
" ORDER BY weight DESC;\n",
"\"\"\")\n",
"\n",
@ -181,7 +181,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 20,
"id": "13062474",
"metadata": {
"vscode": {
@ -192,24 +192,27 @@
"source": [
"q = query_db(\"\"\"\n",
"select\n",
"round(umap_x, 3) as umap_x,\n",
"round(umap_y, 3) as umap_y,\n",
"round(umap_z, 3) as umap_z,\n",
"posts.id, title\n",
"cast(umap_x*10 as int) as x,\n",
"cast(umap_y*10 as int) as y,\n",
"cast(umap_z*10 as int) as z,\n",
"posts.id as id, category_id as c,\n",
"SUBSTRING(title, 1, 12) as t\n",
"\n",
"from posts\n",
"inner join postcategories on post_id = posts.id\n",
"inner join categories on category_id = categories.id\n",
"where date > '2020-01-01' and categories.category IN ('Theorie und Diskussion', 'Praxis')\n",
"\n",
"\"\"\")\n",
"\n",
"#where date > '2020-01-01' and categories.category NOT IN ('Presseartikel')\n",
"\n",
"with open('json/umap_embeddings.json', 'w') as f:\n",
" f.write(json.dumps(q))"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 21,
"id": "e5378b17",
"metadata": {
"vscode": {
@ -239,7 +242,7 @@
"SELECT \n",
" tld AS source, \n",
" CASE \n",
" WHEN host_count < 15 THEN 'other'\n",
" WHEN host_count < 10 THEN 'other'\n",
" ELSE host \n",
" END AS target, \n",
" SUM(host_count) AS value\n",
@ -249,7 +252,7 @@
" WHERE tld IS NOT NULL AND host IS NOT NULL \n",
" GROUP BY tld, host\n",
")\n",
"WHERE source != \"\"\n",
"WHERE source != \"\" AND target != 'other'\n",
"GROUP BY tld, target\n",
"\"\"\")\n",
"\n",
@ -259,6 +262,61 @@
"with open('json/urls_l2.json', 'w') as f:\n",
" f.write(json.dumps(q2))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "1501cb06",
"metadata": {
"vscode": {
"languageId": "ruby"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[{'author_name': 'Antifa', 'tag': 'Antifaschismus', 'tag_count': 9},\n",
" {'author_name': 'Antifa', 'tag': 'Antirassismus', 'tag_count': 4},\n",
" {'author_name': 'Antifa', 'tag': 'Antisemitismus', 'tag_count': 4},\n",
" {'author_name': 'Antifa', 'tag': 'Feminismus', 'tag_count': 3},\n",
" {'author_name': 'Antifa', 'tag': 'Recherche', 'tag_count': 3},\n",
" {'author_name': 'Antifa', 'tag': 'Antisexismus', 'tag_count': 3},\n",
" {'author_name': 'Antifa', 'tag': 'Repression', 'tag_count': 2},\n",
" {'author_name': 'Antifa', 'tag': 'Diskussion', 'tag_count': 2},\n",
" {'author_name': 'Antifa', 'tag': 'Medien', 'tag_count': 2},\n",
" {'author_name': 'Antifa', 'tag': 'Solidarität', 'tag_count': 1},\n",
" {'author_name': 'Antifa', 'tag': 'Freiräume', 'tag_count': 1},\n",
" {'author_name': 'Antifa', 'tag': 'Knast', 'tag_count': 1},\n",
" {'author_name': 'Antifa', 'tag': 'Militanz', 'tag_count': 1},\n",
" {'author_name': 'Antifa', 'tag': 'Nationalsozialismus', 'tag_count': 1},\n",
" {'author_name': 'Antifa', 'tag': 'Auswertung', 'tag_count': 1},\n",
" {'author_name': 'Antifa', 'tag': 'Demonstration', 'tag_count': 1},\n",
" {'author_name': 'Antifa', 'tag': 'Krieg', 'tag_count': 1}]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = query_db(\"\"\"\n",
" SELECT \n",
" a.name AS author_name,\n",
" t.tag,\n",
" COUNT(*) AS tag_count\n",
"FROM authors a\n",
"JOIN post_authors pa ON a.id = pa.author_id\n",
"JOIN posttags pt ON pa.post_id = pt.post_id\n",
"JOIN tags t ON pt.tag_id = t.id\n",
"WHERE a.name = 'Antifa'\n",
"GROUP BY a.id, a.name, t.id, t.tag\n",
"ORDER BY tag_count DESC;\n",
"\"\"\")\n",
"\n",
"q"
]
}
],
"metadata": {