diff --git a/4chanthreadfinder.py b/4chanthreadfinder.py index fbb6cd9..640c47b 100755 --- a/4chanthreadfinder.py +++ b/4chanthreadfinder.py @@ -118,7 +118,7 @@ def item_dl(sources, dlfolder): dir_content = listdir(dlfolder) for index,source in enumerate(sources): imagename = findall(r"[^\/]*$", source)[0] - if imagename or f"{imagename} (01).jpg" in dir_content: + if imagename or f"{imagename} (1).jpg" in dir_content: sources.pop(index) print(f"Found duplicate {imagename}") @@ -136,22 +136,22 @@ def constant_dl(folder, url): - folder: folder to dl into - url : board to watch ''' + while True: + sleep(2) + soup = html_get(url) + hrefs = thread_finder(soup, keyword) + sources = scraper(soup) + #item_dl(sources, folder) - sleep(2) - soup = html_get(url) - hrefs = thread_finder(soup, keyword) - sources = scraper(soup) - #item_dl(sources, folder) + #Dling all threads found - #Dling all threads found - - #oneshot - for href in hrefs: - print(f"going after {url}{href}") - subsoup = html_get(f"{url}{href}") - subsources = scraper(subsoup) - print(subsources) - item_dl(subsources, folder) + #oneshot + for href in hrefs: + print(f"going after {url}{href}") + subsoup = html_get(f"{url}{href}") + subsources = scraper(subsoup) + print(subsources) + item_dl(subsources, folder) @@ -188,5 +188,4 @@ else: for thread in thread_objects: thread.start() -#constant diff --git a/README.md b/README.md index cb48db7..415630c 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ Install depedencies: python3 -m pip install beautifulsoup4 mechanicalsoup wget --user ``` -Use: +Use (oneshot): ``` ./4chanthreadfinder.py -u https://boards.4chan.org/b/ -f ./downloads/thread -k 'ylyl thread' @@ -38,3 +38,14 @@ Use: * -f : folder where you want to download all pictures * -k : keyword or keyphrase to search (better use a single word !) +Use (constant, multi-threaded): +``` +./4chanthreadfinder.py -u https://boards.4chan.org/b/ -f ./downloads/threads -k 'thread' -c -t 3 +``` +* -u : URL of the page +* -f : folder where you want to download all pictures +* -k : keyword or keyphrase to search (better use a single word !) +* -c : constant : enables constant downloading +* -t 3 : number of threads. Here, 3 threads keep on running to constantly download + +