From 5acc0642b60d7a77952e8dbfbdf42523176bb042 Mon Sep 17 00:00:00 2001 From: procrastimax Date: Wed, 12 Jul 2023 13:13:31 +0200 Subject: [PATCH] Improves usability --- app.py | 2 +- src/mod_searchable.py | 55 +++++++++++++++++++++++++++++-------------- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/app.py b/app.py index 4c0fba7..fc029d2 100644 --- a/app.py +++ b/app.py @@ -43,7 +43,7 @@ app_ui = ui.page_navbar( ), align="right", ), - selected="intro", + selected="search_engine", fluid=False, title=ui.div(ui.img(src="favicon.ico", width="75dpi", height="75dpi"), ui.h1("Copbird")), diff --git a/src/mod_searchable.py b/src/mod_searchable.py index 509f4be..c59b142 100644 --- a/src/mod_searchable.py +++ b/src/mod_searchable.py @@ -4,6 +4,7 @@ from sklearn.metrics.pairwise import cosine_similarity import pandas as pd import numpy as np import pickle +import re tfidf_matrix_path = "data/tfidf_matrix.pckl" tfidf_vectorizer_path = "data/tfidf_vectorizer.pckl" @@ -13,6 +14,20 @@ tweets_path = "data/tweets_all_combined.csv" reply_html_svg = '' retweet_html_svg = '' like_html_svg = '' +quote_count_svg = ' ' + +link_regex = r"(https?://\S+)" +hashtag_regex = r"#(\w+)" + + +def replace_link(match): + url = match.group(0) + return f'{url}' + +def replace_hastag(match): + hashtag = match.group(0) + name = str(hashtag).removeprefix("#") + return f'{hashtag}' print("Loading data from storage") @@ -44,11 +59,11 @@ def search_query(query: str, limit: int = 5) -> pd.DataFrame: if not len(result): return None - overall = result['relevance_score'] * similarity[correct_indices] + overall = (0.6 * result['relevance_score']) * similarity[correct_indices] return result.loc[overall.sort_values(ascending=False).index].head(limit) -@module.ui +@ module.ui def searchable_ui(): return ui.div( ui.h2("Tweet Suchmaschine"), @@ -60,15 +75,15 @@ def searchable_ui(): @ module.server def searchable_server(input: Inputs, output: Outputs, session: Session): - @output - @render.ui + @ output + @ render.ui def searchable_tweet_ui(): query = input.search_input() - result_pd = search_query(query, 15) + result_pd = search_query(query, 25) - style = "text-align: center; padding-top: 0.5em;" + style = "text-align: center;" tweet_ui = ui.page_fluid() if result_pd is None: @@ -78,27 +93,31 @@ def searchable_server(input: Inputs, output: Outputs, session: Session): # iterating over dataframe is bad but needed for idx, row in result_pd.iterrows(): + + # prettify tweet text + tweet_text = str(row["tweet_text"]).replace("\\n", "
") + tweet_text = re.sub(link_regex, replace_link, tweet_text) + tweet_text = re.sub(hashtag_regex, replace_hastag, tweet_text) + tweet_ui.append( ui.div( ui.row( - ui.column(9, ui.markdown( - f"**{row['user_name']}** *@{row['handle']}*"), style=style), - ui.column(3, ui.p(f"{row['created_at']}"), style=style), + ui.column(6, ui.markdown( + f"**{row['user_name']}** *@{row['handle']}*"), style=style + "padding-top: 0.5em;"), + ui.column(6, ui.p(f"{row['created_at']}"), style=style + "padding-top: 0.5em;"), ), ui.row( - ui.column(12, ui.HTML(str(row["tweet_text"]).replace( - "\\n", "
")), style=style + "font-size: 20px; padding:1em;"), + ui.column(12, ui.div(ui.HTML(tweet_text), style=style + "font-size: 20px; margin: 1em; padding: 0.25em; border: 1px solid #bbb;")), ), ui.row( ui.column(3, ui.HTML(reply_html_svg), ui.p( - f"{row['reply_count']}"), style=style), + f"{row['reply_count']}"), style=style, title="Anzahl Antworten"), ui.column(3, ui.HTML(retweet_html_svg), ui.p( - f"{row['retweet_count']}"), style=style), + f"{row['retweet_count']}"), style=style, title="Anzahl Retweets"), ui.column(3, ui.HTML(like_html_svg), ui.p( - f"{row['like_count']}"), style=style), - # quote_count: . Indicates approximately how many times this Tweet has been quoted by Twitter users. Example: - # TODO: use a nice svg for quote_count - ui.column(3, ui.p(f"Quote Count: {row['quote_count']}"), style=style), - ), style="border: 1px solid #954; margin-bottom: 1em;")) + f"{row['like_count']}"), style=style, title="Anzahl Likes"), + ui.column(3, ui.HTML(quote_count_svg), ui.p( + f"{row['quote_count']}"), style=style, title="Anzahl Quotes"), + ), style="border: 2px solid #119; margin-bottom: 1.5em; border-radius: 10px;")) return tweet_ui