Improves usability

This commit is contained in:
procrastimax 2023-07-12 13:13:31 +02:00
parent 933e1817ca
commit 5acc0642b6
2 changed files with 38 additions and 19 deletions

2
app.py
View file

@ -43,7 +43,7 @@ app_ui = ui.page_navbar(
), ),
align="right", align="right",
), ),
selected="intro", selected="search_engine",
fluid=False, fluid=False,
title=ui.div(ui.img(src="favicon.ico", width="75dpi", height="75dpi"), title=ui.div(ui.img(src="favicon.ico", width="75dpi", height="75dpi"),
ui.h1("Copbird")), ui.h1("Copbird")),

View file

@ -4,6 +4,7 @@ from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import pickle import pickle
import re
tfidf_matrix_path = "data/tfidf_matrix.pckl" tfidf_matrix_path = "data/tfidf_matrix.pckl"
tfidf_vectorizer_path = "data/tfidf_vectorizer.pckl" tfidf_vectorizer_path = "data/tfidf_vectorizer.pckl"
@ -13,6 +14,20 @@ tweets_path = "data/tweets_all_combined.csv"
reply_html_svg = '<svg width="18px" height="18px" viewBox="0 0 24 24" aria-hidden="true"><g><path d="M1.751 10c0-4.42 3.584-8 8.005-8h4.366c4.49 0 8.129 3.64 8.129 8.13 0 2.96-1.607 5.68-4.196 7.11l-8.054 4.46v-3.69h-.067c-4.49.1-8.183-3.51-8.183-8.01zm8.005-6c-3.317 0-6.005 2.69-6.005 6 0 3.37 2.77 6.08 6.138 6.01l.351-.01h1.761v2.3l5.087-2.81c1.951-1.08 3.163-3.13 3.163-5.36 0-3.39-2.744-6.13-6.129-6.13H9.756z"></path></g></svg>' reply_html_svg = '<svg width="18px" height="18px" viewBox="0 0 24 24" aria-hidden="true"><g><path d="M1.751 10c0-4.42 3.584-8 8.005-8h4.366c4.49 0 8.129 3.64 8.129 8.13 0 2.96-1.607 5.68-4.196 7.11l-8.054 4.46v-3.69h-.067c-4.49.1-8.183-3.51-8.183-8.01zm8.005-6c-3.317 0-6.005 2.69-6.005 6 0 3.37 2.77 6.08 6.138 6.01l.351-.01h1.761v2.3l5.087-2.81c1.951-1.08 3.163-3.13 3.163-5.36 0-3.39-2.744-6.13-6.129-6.13H9.756z"></path></g></svg>'
retweet_html_svg = '<svg width="18px" height="18px" viewBox="0 0 24 24" aria-hidden="true"><g><path d="M4.5 3.88l4.432 4.14-1.364 1.46L5.5 7.55V16c0 1.1.896 2 2 2H13v2H7.5c-2.209 0-4-1.79-4-4V7.55L1.432 9.48.068 8.02 4.5 3.88zM16.5 6H11V4h5.5c2.209 0 4 1.79 4 4v8.45l2.068-1.93 1.364 1.46-4.432 4.14-4.432-4.14 1.364-1.46 2.068 1.93V8c0-1.1-.896-2-2-2z"></path></g></svg>' retweet_html_svg = '<svg width="18px" height="18px" viewBox="0 0 24 24" aria-hidden="true"><g><path d="M4.5 3.88l4.432 4.14-1.364 1.46L5.5 7.55V16c0 1.1.896 2 2 2H13v2H7.5c-2.209 0-4-1.79-4-4V7.55L1.432 9.48.068 8.02 4.5 3.88zM16.5 6H11V4h5.5c2.209 0 4 1.79 4 4v8.45l2.068-1.93 1.364 1.46-4.432 4.14-4.432-4.14 1.364-1.46 2.068 1.93V8c0-1.1-.896-2-2-2z"></path></g></svg>'
like_html_svg = '<svg width="18px" height="18px" viewBox="0 0 24 24" aria-hidden="true"><g><path d="M16.697 5.5c-1.222-.06-2.679.51-3.89 2.16l-.805 1.09-.806-1.09C9.984 6.01 8.526 5.44 7.304 5.5c-1.243.07-2.349.78-2.91 1.91-.552 1.12-.633 2.78.479 4.82 1.074 1.97 3.257 4.27 7.129 6.61 3.87-2.34 6.052-4.64 7.126-6.61 1.111-2.04 1.03-3.7.477-4.82-.561-1.13-1.666-1.84-2.908-1.91zm4.187 7.69c-1.351 2.48-4.001 5.12-8.379 7.67l-.503.3-.504-.3c-4.379-2.55-7.029-5.19-8.382-7.67-1.36-2.5-1.41-4.86-.514-6.67.887-1.79 2.647-2.91 4.601-3.01 1.651-.09 3.368.56 4.798 2.01 1.429-1.45 3.146-2.1 4.796-2.01 1.954.1 3.714 1.22 4.601 3.01.896 1.81.846 4.17-.514 6.67z"></path></g></svg>' like_html_svg = '<svg width="18px" height="18px" viewBox="0 0 24 24" aria-hidden="true"><g><path d="M16.697 5.5c-1.222-.06-2.679.51-3.89 2.16l-.805 1.09-.806-1.09C9.984 6.01 8.526 5.44 7.304 5.5c-1.243.07-2.349.78-2.91 1.91-.552 1.12-.633 2.78.479 4.82 1.074 1.97 3.257 4.27 7.129 6.61 3.87-2.34 6.052-4.64 7.126-6.61 1.111-2.04 1.03-3.7.477-4.82-.561-1.13-1.666-1.84-2.908-1.91zm4.187 7.69c-1.351 2.48-4.001 5.12-8.379 7.67l-.503.3-.504-.3c-4.379-2.55-7.029-5.19-8.382-7.67-1.36-2.5-1.41-4.86-.514-6.67.887-1.79 2.647-2.91 4.601-3.01 1.651-.09 3.368.56 4.798 2.01 1.429-1.45 3.146-2.1 4.796-2.01 1.954.1 3.714 1.22 4.601 3.01.896 1.81.846 4.17-.514 6.67z"></path></g></svg>'
quote_count_svg = '<svg width="18px" height="18px" viewBox="0 0 57 57"><g stroke-width="0"></g><g stroke-linecap="round" stroke-linejoin="round"></g><g> <g> <circle cx="18.5" cy="31.5" r="5.5"></circle> <path d="M18.5,38c-3.584,0-6.5-2.916-6.5-6.5s2.916-6.5,6.5-6.5s6.5,2.916,6.5,6.5S22.084,38,18.5,38z M18.5,27c-2.481,0-4.5,2.019-4.5,4.5s2.019,4.5,4.5,4.5s4.5-2.019,4.5-4.5S20.981,27,18.5,27z"></path> </g> <g> <circle cx="35.5" cy="31.5" r="5.5"></circle> <path d="M35.5,38c-3.584,0-6.5-2.916-6.5-6.5s2.916-6.5,6.5-6.5s6.5,2.916,6.5,6.5S39.084,38,35.5,38z M35.5,27c-2.481,0-4.5,2.019-4.5,4.5s2.019,4.5,4.5,4.5s4.5-2.019,4.5-4.5S37.981,27,35.5,27z"></path> </g> <path d="M13,32c-0.553,0-1-0.447-1-1c0-7.72,6.28-14,14-14c0.553,0,1,0.447,1,1s-0.447,1-1,1 c-6.617,0-12,5.383-12,12C14,31.553,13.553,32,13,32z"></path> <path d="M30,32c-0.553,0-1-0.447-1-1c0-7.72,6.28-14,14-14c0.553,0,1,0.447,1,1s-0.447,1-1,1 c-6.617,0-12,5.383-12,12C31,31.553,30.553,32,30,32z"></path> </g></svg>'
link_regex = r"(https?://\S+)"
hashtag_regex = r"#(\w+)"
def replace_link(match):
url = match.group(0)
return f'<a href="{url}">{url}</a>'
def replace_hastag(match):
hashtag = match.group(0)
name = str(hashtag).removeprefix("#")
return f'<a href="https://twitter.com/search?q=%23{name}" style="text-decoration:none">{hashtag}</a>'
print("Loading data from storage") print("Loading data from storage")
@ -44,7 +59,7 @@ def search_query(query: str, limit: int = 5) -> pd.DataFrame:
if not len(result): if not len(result):
return None return None
overall = result['relevance_score'] * similarity[correct_indices] overall = (0.6 * result['relevance_score']) * similarity[correct_indices]
return result.loc[overall.sort_values(ascending=False).index].head(limit) return result.loc[overall.sort_values(ascending=False).index].head(limit)
@ -66,9 +81,9 @@ def searchable_server(input: Inputs, output: Outputs, session: Session):
query = input.search_input() query = input.search_input()
result_pd = search_query(query, 15) result_pd = search_query(query, 25)
style = "text-align: center; padding-top: 0.5em;" style = "text-align: center;"
tweet_ui = ui.page_fluid() tweet_ui = ui.page_fluid()
if result_pd is None: if result_pd is None:
@ -78,27 +93,31 @@ def searchable_server(input: Inputs, output: Outputs, session: Session):
# iterating over dataframe is bad but needed # iterating over dataframe is bad but needed
for idx, row in result_pd.iterrows(): for idx, row in result_pd.iterrows():
# prettify tweet text
tweet_text = str(row["tweet_text"]).replace("\\n", "<br>")
tweet_text = re.sub(link_regex, replace_link, tweet_text)
tweet_text = re.sub(hashtag_regex, replace_hastag, tweet_text)
tweet_ui.append( tweet_ui.append(
ui.div( ui.div(
ui.row( ui.row(
ui.column(9, ui.markdown( ui.column(6, ui.markdown(
f"**{row['user_name']}***@{row['handle']}*"), style=style), f"**{row['user_name']}***@{row['handle']}*"), style=style + "padding-top: 0.5em;"),
ui.column(3, ui.p(f"{row['created_at']}"), style=style), ui.column(6, ui.p(f"{row['created_at']}"), style=style + "padding-top: 0.5em;"),
), ),
ui.row( ui.row(
ui.column(12, ui.HTML(str(row["tweet_text"]).replace( ui.column(12, ui.div(ui.HTML(tweet_text), style=style + "font-size: 20px; margin: 1em; padding: 0.25em; border: 1px solid #bbb;")),
"\\n", "<br>")), style=style + "font-size: 20px; padding:1em;"),
), ),
ui.row( ui.row(
ui.column(3, ui.HTML(reply_html_svg), ui.p( ui.column(3, ui.HTML(reply_html_svg), ui.p(
f"{row['reply_count']}"), style=style), f"{row['reply_count']}"), style=style, title="Anzahl Antworten"),
ui.column(3, ui.HTML(retweet_html_svg), ui.p( ui.column(3, ui.HTML(retweet_html_svg), ui.p(
f"{row['retweet_count']}"), style=style), f"{row['retweet_count']}"), style=style, title="Anzahl Retweets"),
ui.column(3, ui.HTML(like_html_svg), ui.p( ui.column(3, ui.HTML(like_html_svg), ui.p(
f"{row['like_count']}"), style=style), f"{row['like_count']}"), style=style, title="Anzahl Likes"),
# quote_count: . Indicates approximately how many times this Tweet has been quoted by Twitter users. Example: ui.column(3, ui.HTML(quote_count_svg), ui.p(
# TODO: use a nice svg for quote_count f"{row['quote_count']}"), style=style, title="Anzahl Quotes"),
ui.column(3, ui.p(f"Quote Count: {row['quote_count']}"), style=style), ), style="border: 2px solid #119; margin-bottom: 1.5em; border-radius: 10px;"))
), style="border: 1px solid #954; margin-bottom: 1em;"))
return tweet_ui return tweet_ui