Compare commits
	
		
			2 commits
		
	
	
		
			07f8b1ef31
			...
			7cc3d1b7e4
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 7cc3d1b7e4 | |||
| 116cce79a4 | 
					 1 changed files with 5 additions and 3 deletions
				
			
		
							
								
								
									
										8
									
								
								main.py
									
										
									
									
									
								
							
							
						
						
									
										8
									
								
								main.py
									
										
									
									
									
								
							| 
						 | 
					@ -1,4 +1,4 @@
 | 
				
			||||||
#! python
 | 
					#! python3
 | 
				
			||||||
import locale
 | 
					import locale
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
| 
						 | 
					@ -74,12 +74,15 @@ def download(id: int):
 | 
				
			||||||
        author = None
 | 
					        author = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        category = pC.find("span", {"class": "categoryInfo"}).find().text
 | 
					        category = pC.find("span", {"class": "categoryInfo"}).find_all()
 | 
				
			||||||
 | 
					        category = [c.text for c in category]
 | 
				
			||||||
 | 
					        category = ";".join(category)
 | 
				
			||||||
    except AttributeError:
 | 
					    except AttributeError:
 | 
				
			||||||
        category = None
 | 
					        category = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        tags = [x.text for x in pC.find("div", {"class": "tagsInfo"}).find_all("a")]
 | 
					        tags = [x.text for x in pC.find("div", {"class": "tagsInfo"}).find_all("a")]
 | 
				
			||||||
 | 
					        tags = ";".join(tags)
 | 
				
			||||||
    except AttributeError:
 | 
					    except AttributeError:
 | 
				
			||||||
        tags = None
 | 
					        tags = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -127,7 +130,6 @@ def run_downloads(min_id: int, max_id: int, num_threads: int = 8):
 | 
				
			||||||
    # sqlite can't handle lists so let's convert them to a single row csv
 | 
					    # sqlite can't handle lists so let's convert them to a single row csv
 | 
				
			||||||
    # TODO: make sure our database is properly normalized
 | 
					    # TODO: make sure our database is properly normalized
 | 
				
			||||||
    df = pd.DataFrame(res)
 | 
					    df = pd.DataFrame(res)
 | 
				
			||||||
    df.tags = df.tags.apply(lambda x: "; ".join(x) if x is not None else None)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return df
 | 
					    return df
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue