Update '4chanthreadfinder.py'
Bugfixes
This commit is contained in:
parent
aa67b222d8
commit
a97067d452
@ -83,7 +83,6 @@ def html_get(url):
|
|||||||
browser = mechanicalsoup.Browser()
|
browser = mechanicalsoup.Browser()
|
||||||
try:
|
try:
|
||||||
page = browser.get(url)
|
page = browser.get(url)
|
||||||
print(f"Got html from {url}")
|
|
||||||
return page.soup
|
return page.soup
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Got error {e}")
|
print(f"Got error {e}")
|
||||||
@ -122,12 +121,6 @@ def item_dl(sources, dlfolder):
|
|||||||
|
|
||||||
global folder_content
|
global folder_content
|
||||||
|
|
||||||
try:
|
|
||||||
#Making folder
|
|
||||||
mkdir(dlfolder)
|
|
||||||
except FileExistsError:
|
|
||||||
print(f"{dlfolder} already exists, not creating")
|
|
||||||
|
|
||||||
#Deduplicating
|
#Deduplicating
|
||||||
imagenames = []
|
imagenames = []
|
||||||
|
|
||||||
@ -136,10 +129,25 @@ def item_dl(sources, dlfolder):
|
|||||||
imagename = findall(r"[^\/]*$", source)[0]
|
imagename = findall(r"[^\/]*$", source)[0]
|
||||||
if imagename[:-4] not in folder_content:
|
if imagename[:-4] not in folder_content:
|
||||||
name = wget.download(fullsource, out=dlfolder)
|
name = wget.download(fullsource, out=dlfolder)
|
||||||
print(f"{name} downloaded")
|
print(f"{name} downloaded from {source}")
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def folder_create(dlfolder):
|
||||||
|
'''
|
||||||
|
Create the folder if it does not exist
|
||||||
|
Args:
|
||||||
|
- dlfolder : path of folder to create
|
||||||
|
'''
|
||||||
|
|
||||||
|
try:
|
||||||
|
#Making folder
|
||||||
|
mkdir(dlfolder)
|
||||||
|
except FileExistsError:
|
||||||
|
print(f"{dlfolder} already exists, not creating")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def folder_watch(folder):
|
def folder_watch(folder):
|
||||||
'''
|
'''
|
||||||
Watch for the content of a folder and return its content.
|
Watch for the content of a folder and return its content.
|
||||||
@ -179,7 +187,6 @@ def dl_threads(folder, url):
|
|||||||
|
|
||||||
#oneshot
|
#oneshot
|
||||||
for href in hrefs:
|
for href in hrefs:
|
||||||
print(f"going after {url}{href}")
|
|
||||||
subsoup = html_get(f"{url}{href}")
|
subsoup = html_get(f"{url}{href}")
|
||||||
subsources = scraper(subsoup)
|
subsources = scraper(subsoup)
|
||||||
folder_watch(folder)
|
folder_watch(folder)
|
||||||
@ -198,7 +205,9 @@ url = args.url
|
|||||||
soup = html_get(url)
|
soup = html_get(url)
|
||||||
hrefs = thread_finder(soup, keyword)
|
hrefs = thread_finder(soup, keyword)
|
||||||
sources = scraper(soup)
|
sources = scraper(soup)
|
||||||
|
folder_create(folder)
|
||||||
folder_content = ""
|
folder_content = ""
|
||||||
|
|
||||||
#item_dl(sources, folder)
|
#item_dl(sources, folder)
|
||||||
|
|
||||||
#Dling all threads found
|
#Dling all threads found
|
||||||
@ -207,10 +216,8 @@ folder_content = ""
|
|||||||
if not args.constant:
|
if not args.constant:
|
||||||
for href in hrefs:
|
for href in hrefs:
|
||||||
folder_watch(folder)
|
folder_watch(folder)
|
||||||
dl_threads(folder_url)
|
dl_threads(folder, url)
|
||||||
else:
|
else:
|
||||||
while True:
|
while True:
|
||||||
folder_watch(folder)
|
folder_watch(folder)
|
||||||
dl_threads(folder, url)
|
dl_threads(folder, url)
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user