diff --git a/1612800279774.jpgg1ir4h9g.tmp b/1612800279774.jpgg1ir4h9g.tmp new file mode 100644 index 0000000..72ba3c7 Binary files /dev/null and b/1612800279774.jpgg1ir4h9g.tmp differ diff --git a/1612800847218.jpgz00i3nd3.tmp b/1612800847218.jpgz00i3nd3.tmp new file mode 100644 index 0000000..c8e318a Binary files /dev/null and b/1612800847218.jpgz00i3nd3.tmp differ diff --git a/1612801102502.jpg7bewagr3.tmp b/1612801102502.jpg7bewagr3.tmp new file mode 100644 index 0000000..b37932e Binary files /dev/null and b/1612801102502.jpg7bewagr3.tmp differ diff --git a/1612801206104.jpgn8w76bsu.tmp b/1612801206104.jpgn8w76bsu.tmp new file mode 100644 index 0000000..37b3988 Binary files /dev/null and b/1612801206104.jpgn8w76bsu.tmp differ diff --git a/1612801259880.jpg66nlexsl.tmp b/1612801259880.jpg66nlexsl.tmp new file mode 100644 index 0000000..76d6d0f Binary files /dev/null and b/1612801259880.jpg66nlexsl.tmp differ diff --git a/1612801259880.jpg6__c8xd4.tmp b/1612801259880.jpg6__c8xd4.tmp new file mode 100644 index 0000000..5f9c452 Binary files /dev/null and b/1612801259880.jpg6__c8xd4.tmp differ diff --git a/1612801442460.jpguqt7rcl2.tmp b/1612801442460.jpguqt7rcl2.tmp new file mode 100644 index 0000000..28d0a24 Binary files /dev/null and b/1612801442460.jpguqt7rcl2.tmp differ diff --git a/1612801753931.jpgebpmo83j.tmp b/1612801753931.jpgebpmo83j.tmp new file mode 100644 index 0000000..bc73fc7 Binary files /dev/null and b/1612801753931.jpgebpmo83j.tmp differ diff --git a/1612801753931.jpgr25ymlkk.tmp b/1612801753931.jpgr25ymlkk.tmp new file mode 100644 index 0000000..bc73fc7 Binary files /dev/null and b/1612801753931.jpgr25ymlkk.tmp differ diff --git a/1612801818460.jpgyn5eonf9.tmp b/1612801818460.jpgyn5eonf9.tmp new file mode 100644 index 0000000..15c4359 Binary files /dev/null and b/1612801818460.jpgyn5eonf9.tmp differ diff --git a/1612801951857.jpgm0h3hv2r.tmp b/1612801951857.jpgm0h3hv2r.tmp new file mode 100644 index 0000000..ae896f9 Binary files /dev/null and b/1612801951857.jpgm0h3hv2r.tmp differ diff --git a/4chanthreadfinder.py b/4chanthreadfinder.py index 7c76f51..f7fd47a 100755 --- a/4chanthreadfinder.py +++ b/4chanthreadfinder.py @@ -40,7 +40,6 @@ def getArgs(): parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True) parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True) parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true") - parser.add_argument("-t", "--threads", help = "Number of threads in case of constant run, defaults to 2", action="store", type=int, required=False) #Creating the args object args=parser.parse_args() @@ -116,51 +115,75 @@ def item_dl(sources, dlfolder): ''' Download all items in the sources list to folder dlfolder, which we try to create" Args: - - sources : a list of URLs + - sources : a list of URLsi + - global folder_content : see folder_watch() + ''' - #Making folder + + global folder_content + try: + #Making folder mkdir(dlfolder) except FileExistsError: print(f"{dlfolder} already exists, not creating") #Deduplicating imagenames = [] - dir_content = listdir(dlfolder) for source in sources: fullsource = "http://" + source imagename = findall(r"[^\/]*$", source)[0] - if imagename[:-4] not in str(dir_content): + if imagename[:-4] not in folder_content: name = wget.download(fullsource, out=dlfolder) print(f"{name} downloaded") return True -def constant_dl(folder, url): +def folder_watch(folder): + ''' + Watch for the content of a folder and return its content. + Content is a string containing all the names of all the elements. + Args: + - folder : folder to watch + - global folder_content : see folder_watch() + Returns: + folder_content : said string, containing all the names of all the files in the folder + ''' + + global folder_content + + folder_list = listdir(folder) + folder_content = "" + + for i in folder_list: + folder_content += i + + +def dl_threads(folder, url): ''' Constantly download... Args: - folder: folder to dl into - url : board to watch ''' + try: - while True: - sleep(2) - soup = html_get(url) - hrefs = thread_finder(soup, keyword) - sources = scraper(soup) - #item_dl(sources, folder) + sleep(2) + soup = html_get(url) + hrefs = thread_finder(soup, keyword) + sources = scraper(soup) + #item_dl(sources, folder) - #Dling all threads found + #Dling all threads found - #oneshot - for href in hrefs: - print(f"going after {url}{href}") - subsoup = html_get(f"{url}{href}") - subsources = scraper(subsoup) - print(subsources) - item_dl(subsources, folder) + #oneshot + for href in hrefs: + print(f"going after {url}{href}") + subsoup = html_get(f"{url}{href}") + subsources = scraper(subsoup) + folder_watch(folder) + item_dl(subsources, folder) except Exception as e: print(f"Houston, we had a problem: \n{e}") @@ -170,15 +193,12 @@ def constant_dl(folder, url): args = getArgs() folder = args.folder keyword = args.keyword -if args.threads: - threadnumber = args.threads -else: - threadnumber = 2 url = args.url soup = html_get(url) hrefs = thread_finder(soup, keyword) sources = scraper(soup) +folder_content = "" #item_dl(sources, folder) #Dling all threads found @@ -186,16 +206,11 @@ sources = scraper(soup) #oneshot if not args.constant: for href in hrefs: - print(f"going after {url}{href}") - subsoup = html_get(f"{url}{href}") - subsources = scraper(subsoup) - print(subsources) - item_dl(subsources, folder) + folder_watch(folder) + dl_threads(folder_url) else: - thread_objects = [] - for i in range (1, threadnumber): - thread_objects.append(Thread(target=constant_dl, args=(folder, url))) - for thread in thread_objects: - thread.start() - + while True: + folder_watch(folder) + dl_threads(folder, url) + diff --git a/README.md b/README.md index 2ab2ceb..95080c6 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,6 @@ Use (constant, multi-threaded): ##Todo * Filter by filetype -* Multi-threaded not really working, -t 2 gives one thread and many threads will cause duplicates * Use a try / catch when dling since some threads go 404 and it gives us a crash * Make a pretty website with some keywords running in the bg, making for some nice public folders (wallpapers...)