diff --git a/main.py b/main.py index 850ba3c..5305653 100755 --- a/main.py +++ b/main.py @@ -1,4 +1,4 @@ -#! python3 +#! python import locale import logging import os @@ -74,15 +74,12 @@ def download(id: int): author = None try: - category = pC.find("span", {"class": "categoryInfo"}).find_all() - category = [c.text for c in category] - category = ";".join(category) + category = pC.find("span", {"class": "categoryInfo"}).find().text except AttributeError: category = None try: tags = [x.text for x in pC.find("div", {"class": "tagsInfo"}).find_all("a")] - tags = ";".join(tags) except AttributeError: tags = None @@ -130,6 +127,7 @@ def run_downloads(min_id: int, max_id: int, num_threads: int = 8): # sqlite can't handle lists so let's convert them to a single row csv # TODO: make sure our database is properly normalized df = pd.DataFrame(res) + df.tags = df.tags.apply(lambda x: "; ".join(x) if x is not None else None) return df