Plus de threads, trop complexe

2021-02-08 17:38:31 +01:00
parent 85a798b311
commit 913208274b
13 changed files with 50 additions and 36 deletions
--- a/1612800279774.jpgg1ir4h9g.tmp
+++ b/1612800279774.jpgg1ir4h9g.tmp
--- a/1612800847218.jpgz00i3nd3.tmp
+++ b/1612800847218.jpgz00i3nd3.tmp
--- a/1612801102502.jpg7bewagr3.tmp
+++ b/1612801102502.jpg7bewagr3.tmp
--- a/1612801206104.jpgn8w76bsu.tmp
+++ b/1612801206104.jpgn8w76bsu.tmp
--- a/1612801259880.jpg66nlexsl.tmp
+++ b/1612801259880.jpg66nlexsl.tmp
--- a/1612801259880.jpg6__c8xd4.tmp
+++ b/1612801259880.jpg6__c8xd4.tmp
--- a/1612801442460.jpguqt7rcl2.tmp
+++ b/1612801442460.jpguqt7rcl2.tmp
--- a/1612801753931.jpgebpmo83j.tmp
+++ b/1612801753931.jpgebpmo83j.tmp
--- a/1612801753931.jpgr25ymlkk.tmp
+++ b/1612801753931.jpgr25ymlkk.tmp
--- a/1612801818460.jpgyn5eonf9.tmp
+++ b/1612801818460.jpgyn5eonf9.tmp
--- a/1612801951857.jpgm0h3hv2r.tmp
+++ b/1612801951857.jpgm0h3hv2r.tmp
--- a/4chanthreadfinder.py
+++ b/4chanthreadfinder.py
@ -40,7 +40,6 @@ def getArgs():
    parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
    parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True)
    parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
    parser.add_argument("-t", "--threads", help = "Number of threads in case of constant run, defaults to 2", action="store", type=int, required=False)
    #Creating the args object
    args=parser.parse_args()
@ -116,51 +115,75 @@ def item_dl(sources, dlfolder):
    '''
    Download all items in the sources list to folder dlfolder, which we try to create"
    Args:
-    - sources : a list of URLs
+    - sources : a list of URLsi
    - global folder_content : see folder_watch()
    '''
-    #Making folder
+    
    global folder_content
    try:
    #Making folder
        mkdir(dlfolder)
    except FileExistsError:
        print(f"{dlfolder} already exists, not creating")
    #Deduplicating
    imagenames = []
    dir_content = listdir(dlfolder)
    for source in sources:
        fullsource = "http://" + source
        imagename = findall(r"[^\/]*$", source)[0]
-        if imagename[:-4] not in str(dir_content):
+        if imagename[:-4] not in folder_content:
            name = wget.download(fullsource, out=dlfolder)
            print(f"{name} downloaded")
    return True
-def constant_dl(folder, url):
+def folder_watch(folder):
    '''
    Watch for the content of a folder and return its content.
    Content is a string containing all the names of all the elements.
    Args:
    - folder : folder to watch
    - global folder_content : see folder_watch()
    Returns:
    folder_content : said string, containing all the names of all the files in the folder
    '''
    global folder_content
    folder_list = listdir(folder)
    folder_content = "" 
    for i in folder_list:
        folder_content += i
 def dl_threads(folder, url):
    '''
    Constantly download...
    Args: 
    - folder: folder to dl into
    - url : board to watch
    '''
    try:
-        while True:
+        sleep(2)
-            sleep(2)
+        soup = html_get(url)
-            soup = html_get(url)
+        hrefs = thread_finder(soup, keyword)
-            hrefs = thread_finder(soup, keyword)
+        sources = scraper(soup)
-            sources = scraper(soup)
+        #item_dl(sources, folder)
            #item_dl(sources, folder)
-            #Dling all threads found
+        #Dling all threads found
-            #oneshot
+        #oneshot
-            for href in hrefs:
+        for href in hrefs:
-                print(f"going after {url}{href}")
+            print(f"going after {url}{href}")
-                subsoup = html_get(f"{url}{href}")
+            subsoup = html_get(f"{url}{href}")
-                subsources = scraper(subsoup)
+            subsources = scraper(subsoup)
-                print(subsources)
+            folder_watch(folder)
-                item_dl(subsources, folder)
+            item_dl(subsources, folder)
    except Exception as e:
        print(f"Houston, we had a problem: \n{e}")
@ -170,15 +193,12 @@ def constant_dl(folder, url):
 args = getArgs()
 folder = args.folder
 keyword = args.keyword
 if args.threads:
    threadnumber = args.threads
 else:
    threadnumber = 2
 url = args.url
 soup = html_get(url)
 hrefs = thread_finder(soup, keyword)
 sources = scraper(soup)
 folder_content = ""
 #item_dl(sources, folder)
 #Dling all threads found
@ -186,16 +206,11 @@ sources = scraper(soup)
 #oneshot
 if not args.constant:
    for href in hrefs:
-        print(f"going after {url}{href}")
+        folder_watch(folder)
-        subsoup = html_get(f"{url}{href}")
+        dl_threads(folder_url)
        subsources = scraper(subsoup)
        print(subsources)
        item_dl(subsources, folder)
 else:
-    thread_objects = []
+    while True:
-    for i in range (1, threadnumber):
+            folder_watch(folder)
-        thread_objects.append(Thread(target=constant_dl, args=(folder, url)))
+            dl_threads(folder, url)
-    for thread in thread_objects:
+            
        thread.start()
--- a/README.md
+++ b/README.md
@ -50,7 +50,6 @@ Use (constant, multi-threaded):
 ##Todo
 * Filter by filetype
 * Multi-threaded not really working, -t 2 gives one thread and many threads will cause duplicates
 * Use a try / catch when dling since some threads go 404 and it gives us a crash
 * Make a pretty website with some keywords running in the bg, making for some nice public folders (wallpapers...)