Update '4chanthreadfinder.py'

Bugfixes
This commit is contained in:
justine 2021-02-09 20:19:48 +00:00
parent aa67b222d8
commit a97067d452

View File

@ -83,7 +83,6 @@ def html_get(url):
browser = mechanicalsoup.Browser()
try:
page = browser.get(url)
print(f"Got html from {url}")
return page.soup
except Exception as e:
print(f"Got error {e}")
@ -122,12 +121,6 @@ def item_dl(sources, dlfolder):
global folder_content
try:
#Making folder
mkdir(dlfolder)
except FileExistsError:
print(f"{dlfolder} already exists, not creating")
#Deduplicating
imagenames = []
@ -136,10 +129,25 @@ def item_dl(sources, dlfolder):
imagename = findall(r"[^\/]*$", source)[0]
if imagename[:-4] not in folder_content:
name = wget.download(fullsource, out=dlfolder)
print(f"{name} downloaded")
print(f"{name} downloaded from {source}")
return True
def folder_create(dlfolder):
'''
Create the folder if it does not exist
Args:
- dlfolder : path of folder to create
'''
try:
#Making folder
mkdir(dlfolder)
except FileExistsError:
print(f"{dlfolder} already exists, not creating")
def folder_watch(folder):
'''
Watch for the content of a folder and return its content.
@ -179,7 +187,6 @@ def dl_threads(folder, url):
#oneshot
for href in hrefs:
print(f"going after {url}{href}")
subsoup = html_get(f"{url}{href}")
subsources = scraper(subsoup)
folder_watch(folder)
@ -198,7 +205,9 @@ url = args.url
soup = html_get(url)
hrefs = thread_finder(soup, keyword)
sources = scraper(soup)
folder_create(folder)
folder_content = ""
#item_dl(sources, folder)
#Dling all threads found
@ -207,10 +216,8 @@ folder_content = ""
if not args.constant:
for href in hrefs:
folder_watch(folder)
dl_threads(folder_url)
dl_threads(folder, url)
else:
while True:
folder_watch(folder)
dl_threads(folder, url)