Compare commits
No commits in common. "7cc3d1b7e46778df43ca7193cd2b348286b281e0" and "07f8b1ef311330fa4f708424e10bb3e799ec7886" have entirely different histories.
7cc3d1b7e4
...
07f8b1ef31
1 changed files with 3 additions and 5 deletions
8
main.py
8
main.py
|
@ -1,4 +1,4 @@
|
||||||
#! python3
|
#! python
|
||||||
import locale
|
import locale
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
@ -74,15 +74,12 @@ def download(id: int):
|
||||||
author = None
|
author = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
category = pC.find("span", {"class": "categoryInfo"}).find_all()
|
category = pC.find("span", {"class": "categoryInfo"}).find().text
|
||||||
category = [c.text for c in category]
|
|
||||||
category = ";".join(category)
|
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
category = None
|
category = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tags = [x.text for x in pC.find("div", {"class": "tagsInfo"}).find_all("a")]
|
tags = [x.text for x in pC.find("div", {"class": "tagsInfo"}).find_all("a")]
|
||||||
tags = ";".join(tags)
|
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
tags = None
|
tags = None
|
||||||
|
|
||||||
|
@ -130,6 +127,7 @@ def run_downloads(min_id: int, max_id: int, num_threads: int = 8):
|
||||||
# sqlite can't handle lists so let's convert them to a single row csv
|
# sqlite can't handle lists so let's convert them to a single row csv
|
||||||
# TODO: make sure our database is properly normalized
|
# TODO: make sure our database is properly normalized
|
||||||
df = pd.DataFrame(res)
|
df = pd.DataFrame(res)
|
||||||
|
df.tags = df.tags.apply(lambda x: "; ".join(x) if x is not None else None)
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue