Ajout threading
This commit is contained in:
parent
bd16dc7a6e
commit
f7dcea7259
@ -8,6 +8,8 @@ import mechanicalsoup
|
|||||||
import wget
|
import wget
|
||||||
from os import mkdir, listdir
|
from os import mkdir, listdir
|
||||||
from re import findall
|
from re import findall
|
||||||
|
from time import sleep
|
||||||
|
from threading import Thread
|
||||||
'''
|
'''
|
||||||
############## 4chan thread scrapper ################
|
############## 4chan thread scrapper ################
|
||||||
here we look for particular threads on 4chan and dl their images
|
here we look for particular threads on 4chan and dl their images
|
||||||
@ -26,6 +28,8 @@ def getArgs():
|
|||||||
parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True)
|
parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True)
|
||||||
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
|
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
|
||||||
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True)
|
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True)
|
||||||
|
parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
|
||||||
|
parser.add_argument("-t", "--threads", help = "Number of threads in case of constant run, defaults to 2", action="store", type=int, required=False)
|
||||||
|
|
||||||
#Creating the args object
|
#Creating the args object
|
||||||
args=parser.parse_args()
|
args=parser.parse_args()
|
||||||
@ -114,7 +118,7 @@ def item_dl(sources, dlfolder):
|
|||||||
dir_content = listdir(dlfolder)
|
dir_content = listdir(dlfolder)
|
||||||
for index,source in enumerate(sources):
|
for index,source in enumerate(sources):
|
||||||
imagename = findall(r"[^\/]*$", source)[0]
|
imagename = findall(r"[^\/]*$", source)[0]
|
||||||
if imagename in dir_content:
|
if imagename or f"{imagename} (01).jpg" in dir_content:
|
||||||
sources.pop(index)
|
sources.pop(index)
|
||||||
print(f"Found duplicate {imagename}")
|
print(f"Found duplicate {imagename}")
|
||||||
|
|
||||||
@ -123,11 +127,44 @@ def item_dl(sources, dlfolder):
|
|||||||
name = wget.download(fullsource, out=dlfolder)
|
name = wget.download(fullsource, out=dlfolder)
|
||||||
print(f"{name} downloaded")
|
print(f"{name} downloaded")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def constant_dl(folder, url):
|
||||||
|
'''
|
||||||
|
Constantly download...
|
||||||
|
Args:
|
||||||
|
- folder: folder to dl into
|
||||||
|
- url : board to watch
|
||||||
|
'''
|
||||||
|
|
||||||
|
sleep(2)
|
||||||
|
soup = html_get(url)
|
||||||
|
hrefs = thread_finder(soup, keyword)
|
||||||
|
sources = scraper(soup)
|
||||||
|
#item_dl(sources, folder)
|
||||||
|
|
||||||
|
#Dling all threads found
|
||||||
|
|
||||||
|
#oneshot
|
||||||
|
for href in hrefs:
|
||||||
|
print(f"going after {url}{href}")
|
||||||
|
subsoup = html_get(f"{url}{href}")
|
||||||
|
subsources = scraper(subsoup)
|
||||||
|
print(subsources)
|
||||||
|
item_dl(subsources, folder)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#Getting main elements
|
#Getting main elements
|
||||||
args = getArgs()
|
args = getArgs()
|
||||||
folder = args.folder
|
folder = args.folder
|
||||||
keyword = args.keyword
|
keyword = args.keyword
|
||||||
|
if args.threads:
|
||||||
|
threadnumber = args.threads
|
||||||
|
else:
|
||||||
|
threadnumber = 2
|
||||||
|
|
||||||
url = args.url
|
url = args.url
|
||||||
soup = html_get(url)
|
soup = html_get(url)
|
||||||
hrefs = thread_finder(soup, keyword)
|
hrefs = thread_finder(soup, keyword)
|
||||||
@ -136,9 +173,20 @@ sources = scraper(soup)
|
|||||||
|
|
||||||
#Dling all threads found
|
#Dling all threads found
|
||||||
|
|
||||||
for href in hrefs:
|
#oneshot
|
||||||
print(f"going after {url}{href}")
|
if not args.constant:
|
||||||
subsoup = html_get(f"{url}{href}")
|
for href in hrefs:
|
||||||
subsources = scraper(subsoup)
|
print(f"going after {url}{href}")
|
||||||
print(subsources)
|
subsoup = html_get(f"{url}{href}")
|
||||||
item_dl(subsources, folder)
|
subsources = scraper(subsoup)
|
||||||
|
print(subsources)
|
||||||
|
item_dl(subsources, folder)
|
||||||
|
else:
|
||||||
|
thread_objects = []
|
||||||
|
for i in range (1, threadnumber):
|
||||||
|
thread_objects.append(Thread(target=constant_dl, args=(folder, url)))
|
||||||
|
for thread in thread_objects:
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
#constant
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user