Ajout threading

This commit is contained in:
Justine 2021-02-07 23:35:12 +01:00
parent f7dcea7259
commit a117554a04
2 changed files with 27 additions and 17 deletions

View File

@ -118,7 +118,7 @@ def item_dl(sources, dlfolder):
dir_content = listdir(dlfolder)
for index,source in enumerate(sources):
imagename = findall(r"[^\/]*$", source)[0]
if imagename or f"{imagename} (01).jpg" in dir_content:
if imagename or f"{imagename} (1).jpg" in dir_content:
sources.pop(index)
print(f"Found duplicate {imagename}")
@ -136,22 +136,22 @@ def constant_dl(folder, url):
- folder: folder to dl into
- url : board to watch
'''
while True:
sleep(2)
soup = html_get(url)
hrefs = thread_finder(soup, keyword)
sources = scraper(soup)
#item_dl(sources, folder)
sleep(2)
soup = html_get(url)
hrefs = thread_finder(soup, keyword)
sources = scraper(soup)
#item_dl(sources, folder)
#Dling all threads found
#Dling all threads found
#oneshot
for href in hrefs:
print(f"going after {url}{href}")
subsoup = html_get(f"{url}{href}")
subsources = scraper(subsoup)
print(subsources)
item_dl(subsources, folder)
#oneshot
for href in hrefs:
print(f"going after {url}{href}")
subsoup = html_get(f"{url}{href}")
subsources = scraper(subsoup)
print(subsources)
item_dl(subsources, folder)
@ -188,5 +188,4 @@ else:
for thread in thread_objects:
thread.start()
#constant

View File

@ -28,7 +28,7 @@ Install depedencies:
python3 -m pip install beautifulsoup4 mechanicalsoup wget --user
```
Use:
Use (oneshot):
```
./4chanthreadfinder.py -u https://boards.4chan.org/b/ -f ./downloads/thread -k 'ylyl thread'
@ -38,3 +38,14 @@ Use:
* -f : folder where you want to download all pictures
* -k : keyword or keyphrase to search (better use a single word !)
Use (constant, multi-threaded):
```
./4chanthreadfinder.py -u https://boards.4chan.org/b/ -f ./downloads/threads -k 'thread' -c -t 3
```
* -u : URL of the page
* -f : folder where you want to download all pictures
* -k : keyword or keyphrase to search (better use a single word !)
* -c : constant : enables constant downloading
* -t 3 : number of threads. Here, 3 threads keep on running to constantly download