Plus de threads, trop complexe

2021-02-08 17:38:31 +01:00 · 2021-02-08 17:38:31 +01:00 · 913208274b
commit 913208274b
parent 85a798b311
13 changed files with 50 additions and 36 deletions
--- a/1612800279774.jpgg1ir4h9g.tmp
+++ b/1612800279774.jpgg1ir4h9g.tmp
--- a/1612800847218.jpgz00i3nd3.tmp
+++ b/1612800847218.jpgz00i3nd3.tmp
--- a/1612801102502.jpg7bewagr3.tmp
+++ b/1612801102502.jpg7bewagr3.tmp
--- a/1612801206104.jpgn8w76bsu.tmp
+++ b/1612801206104.jpgn8w76bsu.tmp
--- a/1612801259880.jpg66nlexsl.tmp
+++ b/1612801259880.jpg66nlexsl.tmp
--- a/1612801259880.jpg6__c8xd4.tmp
+++ b/1612801259880.jpg6__c8xd4.tmp
--- a/1612801442460.jpguqt7rcl2.tmp
+++ b/1612801442460.jpguqt7rcl2.tmp
--- a/1612801753931.jpgebpmo83j.tmp
+++ b/1612801753931.jpgebpmo83j.tmp
--- a/1612801753931.jpgr25ymlkk.tmp
+++ b/1612801753931.jpgr25ymlkk.tmp
--- a/1612801818460.jpgyn5eonf9.tmp
+++ b/1612801818460.jpgyn5eonf9.tmp
--- a/1612801951857.jpgm0h3hv2r.tmp
+++ b/1612801951857.jpgm0h3hv2r.tmp
--- a/4chanthreadfinder.py
+++ b/4chanthreadfinder.py
@ -40,7 +40,6 @@ def getArgs():
    parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
    parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True)
    parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
-    parser.add_argument("-t", "--threads", help = "Number of threads in case of constant run, defaults to 2", action="store", type=int, required=False)

    #Creating the args object
    args=parser.parse_args()
@ -116,51 +115,75 @@ def item_dl(sources, dlfolder):
    '''
    Download all items in the sources list to folder dlfolder, which we try to create"
    Args:
-    - sources : a list of URLs
+    - sources : a list of URLsi
+    - global folder_content : see folder_watch()
+
    '''
-    #Making folder
+    
+    global folder_content
+    
    try:
+    #Making folder
        mkdir(dlfolder)
    except FileExistsError:
        print(f"{dlfolder} already exists, not creating")

    #Deduplicating
    imagenames = []
-    dir_content = listdir(dlfolder)

    for source in sources:
        fullsource = "http://" + source
        imagename = findall(r"[^\/]*$", source)[0]
-        if imagename[:-4] not in str(dir_content):
+        if imagename[:-4] not in folder_content:
            name = wget.download(fullsource, out=dlfolder)
            print(f"{name} downloaded")

    return True

-def constant_dl(folder, url):
+def folder_watch(folder):
+    '''
+    Watch for the content of a folder and return its content.
+    Content is a string containing all the names of all the elements.
+    Args:
+    - folder : folder to watch
+    - global folder_content : see folder_watch()
+    Returns:
+    folder_content : said string, containing all the names of all the files in the folder
+    '''
+
+    global folder_content
+
+    folder_list = listdir(folder)
+    folder_content = "" 
+
+    for i in folder_list:
+        folder_content += i
+
+
+def dl_threads(folder, url):
    '''
    Constantly download...
    Args: 
    - folder: folder to dl into
    - url : board to watch
    '''
+
    try:
-        while True:
-            sleep(2)
-            soup = html_get(url)
-            hrefs = thread_finder(soup, keyword)
-            sources = scraper(soup)
-            #item_dl(sources, folder)
+        sleep(2)
+        soup = html_get(url)
+        hrefs = thread_finder(soup, keyword)
+        sources = scraper(soup)
+        #item_dl(sources, folder)

-            #Dling all threads found
+        #Dling all threads found

-            #oneshot
-            for href in hrefs:
-                print(f"going after {url}{href}")
-                subsoup = html_get(f"{url}{href}")
-                subsources = scraper(subsoup)
-                print(subsources)
-                item_dl(subsources, folder)
+        #oneshot
+        for href in hrefs:
+            print(f"going after {url}{href}")
+            subsoup = html_get(f"{url}{href}")
+            subsources = scraper(subsoup)
+            folder_watch(folder)
+            item_dl(subsources, folder)
    except Exception as e:
        print(f"Houston, we had a problem: \n{e}")

@ -170,15 +193,12 @@ def constant_dl(folder, url):
 args = getArgs()
 folder = args.folder
 keyword = args.keyword
-if args.threads:
-    threadnumber = args.threads
-else:
-    threadnumber = 2

 url = args.url
 soup = html_get(url)
 hrefs = thread_finder(soup, keyword)
 sources = scraper(soup)
+folder_content = ""
 #item_dl(sources, folder)

 #Dling all threads found
@ -186,16 +206,11 @@ sources = scraper(soup)
 #oneshot
 if not args.constant:
    for href in hrefs:
-        print(f"going after {url}{href}")
-        subsoup = html_get(f"{url}{href}")
-        subsources = scraper(subsoup)
-        print(subsources)
-        item_dl(subsources, folder)
+        folder_watch(folder)
+        dl_threads(folder_url)
 else:
-    thread_objects = []
-    for i in range (1, threadnumber):
-        thread_objects.append(Thread(target=constant_dl, args=(folder, url)))
-    for thread in thread_objects:
-        thread.start()
-
+    while True:
+            folder_watch(folder)
+            dl_threads(folder, url)
+            

--- a/README.md
+++ b/README.md
@ -50,7 +50,6 @@ Use (constant, multi-threaded):

 ##Todo
 * Filter by filetype
-* Multi-threaded not really working, -t 2 gives one thread and many threads will cause duplicates
 * Use a try / catch when dling since some threads go 404 and it gives us a crash
 * Make a pretty website with some keywords running in the bg, making for some nice public folders (wallpapers...)