Update '4chanthreadfinder.py'

Bugfixes
2021-02-09 20:19:48 +00:00 · 2021-02-09 20:19:48 +00:00 · a97067d452
commit a97067d452
parent aa67b222d8
1 changed files with 19 additions and 12 deletions
--- a/4chanthreadfinder.py
+++ b/4chanthreadfinder.py
@ -83,7 +83,6 @@ def html_get(url):
    browser = mechanicalsoup.Browser()
    try:
        page = browser.get(url)
-        print(f"Got html from {url}")
        return page.soup
    except Exception as e:
        print(f"Got error {e}")
@ -122,12 +121,6 @@ def item_dl(sources, dlfolder):
    
    global folder_content
    
-    try:
-    #Making folder
-        mkdir(dlfolder)
-    except FileExistsError:
-        print(f"{dlfolder} already exists, not creating")
-
    #Deduplicating
    imagenames = []

@ -136,10 +129,25 @@ def item_dl(sources, dlfolder):
        imagename = findall(r"[^\/]*$", source)[0]
        if imagename[:-4] not in folder_content:
            name = wget.download(fullsource, out=dlfolder)
-            print(f"{name} downloaded")
+            print(f"{name} downloaded from {source}")

    return True

+def folder_create(dlfolder):
+    '''
+    Create the folder if it does not exist
+    Args:
+    - dlfolder : path of folder to create
+    '''
+
+    try:
+    #Making folder
+        mkdir(dlfolder)
+    except FileExistsError:
+        print(f"{dlfolder} already exists, not creating")
+
+
+
 def folder_watch(folder):
    '''
    Watch for the content of a folder and return its content.
@ -179,7 +187,6 @@ def dl_threads(folder, url):

        #oneshot
        for href in hrefs:
-            print(f"going after {url}{href}")
            subsoup = html_get(f"{url}{href}")
            subsources = scraper(subsoup)
            folder_watch(folder)
@ -198,7 +205,9 @@ url = args.url
 soup = html_get(url)
 hrefs = thread_finder(soup, keyword)
 sources = scraper(soup)
+folder_create(folder)
 folder_content = ""
+
 #item_dl(sources, folder)

 #Dling all threads found
@ -207,10 +216,8 @@ folder_content = ""
 if not args.constant:
    for href in hrefs:
        folder_watch(folder)
-        dl_threads(folder_url)
+        dl_threads(folder, url)
 else:
    while True:
            folder_watch(folder)
            dl_threads(folder, url)
-            
-