Update '4chanthreadfinder.py'
Bugfixes
This commit is contained in:
parent
aa67b222d8
commit
a97067d452
@ -83,7 +83,6 @@ def html_get(url):
|
||||
browser = mechanicalsoup.Browser()
|
||||
try:
|
||||
page = browser.get(url)
|
||||
print(f"Got html from {url}")
|
||||
return page.soup
|
||||
except Exception as e:
|
||||
print(f"Got error {e}")
|
||||
@ -122,12 +121,6 @@ def item_dl(sources, dlfolder):
|
||||
|
||||
global folder_content
|
||||
|
||||
try:
|
||||
#Making folder
|
||||
mkdir(dlfolder)
|
||||
except FileExistsError:
|
||||
print(f"{dlfolder} already exists, not creating")
|
||||
|
||||
#Deduplicating
|
||||
imagenames = []
|
||||
|
||||
@ -136,10 +129,25 @@ def item_dl(sources, dlfolder):
|
||||
imagename = findall(r"[^\/]*$", source)[0]
|
||||
if imagename[:-4] not in folder_content:
|
||||
name = wget.download(fullsource, out=dlfolder)
|
||||
print(f"{name} downloaded")
|
||||
print(f"{name} downloaded from {source}")
|
||||
|
||||
return True
|
||||
|
||||
def folder_create(dlfolder):
|
||||
'''
|
||||
Create the folder if it does not exist
|
||||
Args:
|
||||
- dlfolder : path of folder to create
|
||||
'''
|
||||
|
||||
try:
|
||||
#Making folder
|
||||
mkdir(dlfolder)
|
||||
except FileExistsError:
|
||||
print(f"{dlfolder} already exists, not creating")
|
||||
|
||||
|
||||
|
||||
def folder_watch(folder):
|
||||
'''
|
||||
Watch for the content of a folder and return its content.
|
||||
@ -179,7 +187,6 @@ def dl_threads(folder, url):
|
||||
|
||||
#oneshot
|
||||
for href in hrefs:
|
||||
print(f"going after {url}{href}")
|
||||
subsoup = html_get(f"{url}{href}")
|
||||
subsources = scraper(subsoup)
|
||||
folder_watch(folder)
|
||||
@ -198,7 +205,9 @@ url = args.url
|
||||
soup = html_get(url)
|
||||
hrefs = thread_finder(soup, keyword)
|
||||
sources = scraper(soup)
|
||||
folder_create(folder)
|
||||
folder_content = ""
|
||||
|
||||
#item_dl(sources, folder)
|
||||
|
||||
#Dling all threads found
|
||||
@ -207,10 +216,8 @@ folder_content = ""
|
||||
if not args.constant:
|
||||
for href in hrefs:
|
||||
folder_watch(folder)
|
||||
dl_threads(folder_url)
|
||||
dl_threads(folder, url)
|
||||
else:
|
||||
while True:
|
||||
folder_watch(folder)
|
||||
dl_threads(folder, url)
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user