Compare commits
No commits in common. "d05100d1f57bf6e7b89731c8e972b2654473fc86" and "19d21c91eed1270e32d54a5cc736795eb53bff1a" have entirely different histories.
d05100d1f5
...
19d21c91ee
6 changed files with 6 additions and 484889 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -160,6 +160,3 @@ cython_debug/
|
||||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
#.idea/
|
#.idea/
|
||||||
|
|
||||||
|
|
||||||
data/tfidf_matrix.pckl
|
|
||||||
data/tfidf_vectorizer.pckl
|
|
||||||
|
|
10
app.py
10
app.py
|
@ -2,7 +2,7 @@ from pathlib import Path
|
||||||
from typing import List
|
from typing import List
|
||||||
from shiny import App, ui, Inputs, Outputs, Session
|
from shiny import App, ui, Inputs, Outputs, Session
|
||||||
from shiny.types import NavSetArg
|
from shiny.types import NavSetArg
|
||||||
from src import mod_welcome, mod_searchable
|
from src import mod_welcome
|
||||||
from src.util import load_html_str_from_file
|
from src.util import load_html_str_from_file
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
@ -13,9 +13,8 @@ footer_html: str = load_html_str_from_file(os.path.join("www", "footer.html"))
|
||||||
|
|
||||||
def nav_controls() -> List[NavSetArg]:
|
def nav_controls() -> List[NavSetArg]:
|
||||||
return [
|
return [
|
||||||
ui.nav(ui.h5("Intro"), mod_welcome.welcome_ui("intro"), value="intro"),
|
ui.nav(ui.h5("Intro"), mod_welcome.welcome_ui("Intro"), value="intro"),
|
||||||
ui.nav(ui.h5("Analyse"), "Analyse"),
|
ui.nav(ui.h5("Datensatz Analyse"), "Datensatz Analyse"),
|
||||||
ui.nav(ui.h5("Suchmaschine"), mod_searchable.searchable_ui("search_engine"), value="search_engine"),
|
|
||||||
ui.nav_control(
|
ui.nav_control(
|
||||||
ui.a(
|
ui.a(
|
||||||
ui.h5("AG-Link"),
|
ui.h5("AG-Link"),
|
||||||
|
@ -58,8 +57,7 @@ app_ui = ui.page_navbar(
|
||||||
|
|
||||||
|
|
||||||
def server(input: Inputs, output: Outputs, session: Session):
|
def server(input: Inputs, output: Outputs, session: Session):
|
||||||
mod_welcome.welcome_server("intro")
|
mod_welcome.welcome_server("Intro")
|
||||||
mod_searchable.searchable_server("search_engine")
|
|
||||||
|
|
||||||
|
|
||||||
static_dir = Path(__file__).parent / "www"
|
static_dir = Path(__file__).parent / "www"
|
||||||
|
|
151691
data/tweet_relevance.json
151691
data/tweet_relevance.json
File diff suppressed because it is too large
Load diff
333095
data/tweets_all_combined.csv
333095
data/tweets_all_combined.csv
File diff suppressed because it is too large
Load diff
|
@ -1,93 +0,0 @@
|
||||||
from shiny import module, ui, render, Inputs, Outputs, Session
|
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
||||||
from sklearn.metrics.pairwise import cosine_similarity
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import pickle
|
|
||||||
|
|
||||||
tfidf_matrix_path = "data/tfidf_matrix.pckl"
|
|
||||||
tfidf_vectorizer_path = "data/tfidf_vectorizer.pckl"
|
|
||||||
relevance_score_path = "data/tweet_relevance.json"
|
|
||||||
tweets_path = "data/tweets_all_combined.csv"
|
|
||||||
|
|
||||||
|
|
||||||
print("Loading data from storage")
|
|
||||||
tweets = pd.read_csv(tweets_path)
|
|
||||||
relevance_score = pd.read_csv(relevance_score_path)
|
|
||||||
|
|
||||||
tfidf_matrix = None
|
|
||||||
with open(tfidf_matrix_path, "rb") as f:
|
|
||||||
tfidf_matrix = pickle.load(f)
|
|
||||||
|
|
||||||
tfidf_vectorizer: TfidfVectorizer = None
|
|
||||||
with open(tfidf_vectorizer_path, "rb") as f:
|
|
||||||
tfidf_vectorizer = pickle.load(f)
|
|
||||||
|
|
||||||
|
|
||||||
tweets["relevance_score"] = relevance_score["relevance_score"]
|
|
||||||
tweets = tweets.drop(["user_id", "measured_at", "tweet_id"], axis=1)
|
|
||||||
|
|
||||||
|
|
||||||
def search_query(query: str, limit: int = 5) -> pd.DataFrame:
|
|
||||||
query_vec = tfidf_vectorizer.transform([query])
|
|
||||||
similarity = cosine_similarity(query_vec, tfidf_matrix).flatten()
|
|
||||||
|
|
||||||
filtered = np.where(similarity != 0)[0]
|
|
||||||
indices = np.argsort(-similarity[filtered])
|
|
||||||
correct_indices = filtered[indices]
|
|
||||||
result = tweets.iloc[correct_indices]
|
|
||||||
|
|
||||||
if not len(result):
|
|
||||||
return None
|
|
||||||
|
|
||||||
overall = result['relevance_score'] * similarity[correct_indices]
|
|
||||||
return result.loc[overall.sort_values(ascending=False).index].head(limit)
|
|
||||||
|
|
||||||
|
|
||||||
@module.ui
|
|
||||||
def searchable_ui():
|
|
||||||
return ui.div(
|
|
||||||
ui.h2("Tweet Suchmaschine"),
|
|
||||||
ui.input_text("search_input", "Suche:", placeholder="Gebe Suchterm ein"),
|
|
||||||
ui.HTML("<br>"),
|
|
||||||
ui.output_ui(id="searchable_tweet_ui"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@ module.server
|
|
||||||
def searchable_server(input: Inputs, output: Outputs, session: Session):
|
|
||||||
@output
|
|
||||||
@render.ui
|
|
||||||
def searchable_tweet_ui():
|
|
||||||
|
|
||||||
query = input.search_input()
|
|
||||||
|
|
||||||
result_pd = search_query(query, 15)
|
|
||||||
|
|
||||||
style = "text-align: center; padding-top: 0.5em;"
|
|
||||||
tweet_ui = ui.page_fluid()
|
|
||||||
|
|
||||||
if result_pd is None:
|
|
||||||
return ui.div(
|
|
||||||
ui.h5("Keine Ergebnisse gefunden!")
|
|
||||||
)
|
|
||||||
|
|
||||||
# iterating over dataframe is bad but needed
|
|
||||||
for idx, row in result_pd.iterrows():
|
|
||||||
tweet_ui.append(
|
|
||||||
ui.div(
|
|
||||||
ui.row(
|
|
||||||
ui.column(9, ui.markdown(f"**{row['user_name']}** *@{row['handle']}*"), style=style),
|
|
||||||
ui.column(3, ui.p(f"{row['created_at']}"), style=style),
|
|
||||||
),
|
|
||||||
ui.row(
|
|
||||||
ui.column(12, ui.HTML(str(row["tweet_text"]).replace("\\n", "<br>")), style=style + "font-size: 20px; padding:1em;"),
|
|
||||||
),
|
|
||||||
ui.row(
|
|
||||||
ui.column(3, ui.p(f"👍 {row['like_count']}"), style=style),
|
|
||||||
ui.column(3, ui.p(f"⟲ {row['retweet_count']}"), style=style),
|
|
||||||
ui.column(3, ui.p(f"↪ {row['reply_count']}"), style=style),
|
|
||||||
ui.column(3, ui.p(f"💬 {row['quote_count']}"), style=style),
|
|
||||||
), style="border: 1px solid #954; margin-bottom: 1em;"))
|
|
||||||
|
|
||||||
return tweet_ui
|
|
|
@ -18,6 +18,7 @@ def welcome_ui():
|
||||||
|
|
||||||
[0]: https://ag-link.xyz
|
[0]: https://ag-link.xyz
|
||||||
"""),
|
"""),
|
||||||
|
# ui.output_text("dataset_infos"),
|
||||||
ui.output_ui("dataset_infos"),
|
ui.output_ui("dataset_infos"),
|
||||||
ui.h3("Ursprung der Idee"),
|
ui.h3("Ursprung der Idee"),
|
||||||
ui.markdown("""
|
ui.markdown("""
|
||||||
|
@ -51,7 +52,7 @@ with open("data/general_analysis_results.json", "r") as f:
|
||||||
|
|
||||||
|
|
||||||
@ module.server
|
@ module.server
|
||||||
def welcome_server(input, output, session):
|
def welcome_server(input, output, session, starting_value=0):
|
||||||
@output
|
@output
|
||||||
@render.ui
|
@render.ui
|
||||||
def dataset_infos():
|
def dataset_infos():
|
||||||
|
|
Loading…
Reference in a new issue