From a97067d45288428a94345940fb1b1af33d9f91a9 Mon Sep 17 00:00:00 2001 From: justine Date: Tue, 9 Feb 2021 20:19:48 +0000 Subject: [PATCH] Update '4chanthreadfinder.py' Bugfixes --- 4chanthreadfinder.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/4chanthreadfinder.py b/4chanthreadfinder.py index f7fd47a..d3b49de 100755 --- a/4chanthreadfinder.py +++ b/4chanthreadfinder.py @@ -83,7 +83,6 @@ def html_get(url): browser = mechanicalsoup.Browser() try: page = browser.get(url) - print(f"Got html from {url}") return page.soup except Exception as e: print(f"Got error {e}") @@ -122,12 +121,6 @@ def item_dl(sources, dlfolder): global folder_content - try: - #Making folder - mkdir(dlfolder) - except FileExistsError: - print(f"{dlfolder} already exists, not creating") - #Deduplicating imagenames = [] @@ -136,10 +129,25 @@ def item_dl(sources, dlfolder): imagename = findall(r"[^\/]*$", source)[0] if imagename[:-4] not in folder_content: name = wget.download(fullsource, out=dlfolder) - print(f"{name} downloaded") + print(f"{name} downloaded from {source}") return True +def folder_create(dlfolder): + ''' + Create the folder if it does not exist + Args: + - dlfolder : path of folder to create + ''' + + try: + #Making folder + mkdir(dlfolder) + except FileExistsError: + print(f"{dlfolder} already exists, not creating") + + + def folder_watch(folder): ''' Watch for the content of a folder and return its content. @@ -179,7 +187,6 @@ def dl_threads(folder, url): #oneshot for href in hrefs: - print(f"going after {url}{href}") subsoup = html_get(f"{url}{href}") subsources = scraper(subsoup) folder_watch(folder) @@ -198,7 +205,9 @@ url = args.url soup = html_get(url) hrefs = thread_finder(soup, keyword) sources = scraper(soup) +folder_create(folder) folder_content = "" + #item_dl(sources, folder) #Dling all threads found @@ -207,10 +216,8 @@ folder_content = "" if not args.constant: for href in hrefs: folder_watch(folder) - dl_threads(folder_url) + dl_threads(folder, url) else: while True: folder_watch(folder) dl_threads(folder, url) - -