Ajout threading

This commit is contained in:
Justine 2021-02-07 23:35:12 +01:00
parent f7dcea7259
commit a117554a04
2 changed files with 27 additions and 17 deletions

View File

@ -118,7 +118,7 @@ def item_dl(sources, dlfolder):
dir_content = listdir(dlfolder) dir_content = listdir(dlfolder)
for index,source in enumerate(sources): for index,source in enumerate(sources):
imagename = findall(r"[^\/]*$", source)[0] imagename = findall(r"[^\/]*$", source)[0]
if imagename or f"{imagename} (01).jpg" in dir_content: if imagename or f"{imagename} (1).jpg" in dir_content:
sources.pop(index) sources.pop(index)
print(f"Found duplicate {imagename}") print(f"Found duplicate {imagename}")
@ -136,22 +136,22 @@ def constant_dl(folder, url):
- folder: folder to dl into - folder: folder to dl into
- url : board to watch - url : board to watch
''' '''
while True:
sleep(2)
soup = html_get(url)
hrefs = thread_finder(soup, keyword)
sources = scraper(soup)
#item_dl(sources, folder)
sleep(2) #Dling all threads found
soup = html_get(url)
hrefs = thread_finder(soup, keyword)
sources = scraper(soup)
#item_dl(sources, folder)
#Dling all threads found #oneshot
for href in hrefs:
#oneshot print(f"going after {url}{href}")
for href in hrefs: subsoup = html_get(f"{url}{href}")
print(f"going after {url}{href}") subsources = scraper(subsoup)
subsoup = html_get(f"{url}{href}") print(subsources)
subsources = scraper(subsoup) item_dl(subsources, folder)
print(subsources)
item_dl(subsources, folder)
@ -188,5 +188,4 @@ else:
for thread in thread_objects: for thread in thread_objects:
thread.start() thread.start()
#constant

View File

@ -28,7 +28,7 @@ Install depedencies:
python3 -m pip install beautifulsoup4 mechanicalsoup wget --user python3 -m pip install beautifulsoup4 mechanicalsoup wget --user
``` ```
Use: Use (oneshot):
``` ```
./4chanthreadfinder.py -u https://boards.4chan.org/b/ -f ./downloads/thread -k 'ylyl thread' ./4chanthreadfinder.py -u https://boards.4chan.org/b/ -f ./downloads/thread -k 'ylyl thread'
@ -38,3 +38,14 @@ Use:
* -f : folder where you want to download all pictures * -f : folder where you want to download all pictures
* -k : keyword or keyphrase to search (better use a single word !) * -k : keyword or keyphrase to search (better use a single word !)
Use (constant, multi-threaded):
```
./4chanthreadfinder.py -u https://boards.4chan.org/b/ -f ./downloads/threads -k 'thread' -c -t 3
```
* -u : URL of the page
* -f : folder where you want to download all pictures
* -k : keyword or keyphrase to search (better use a single word !)
* -c : constant : enables constant downloading
* -t 3 : number of threads. Here, 3 threads keep on running to constantly download