Plus de threads, trop complexe

This commit is contained in:
Justine 2021-02-08 17:38:31 +01:00
parent 85a798b311
commit 913208274b
13 changed files with 50 additions and 36 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 248 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 272 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 408 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 344 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 376 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 408 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 920 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

View File

@ -40,7 +40,6 @@ def getArgs():
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True) parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True) parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True)
parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true") parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
parser.add_argument("-t", "--threads", help = "Number of threads in case of constant run, defaults to 2", action="store", type=int, required=False)
#Creating the args object #Creating the args object
args=parser.parse_args() args=parser.parse_args()
@ -116,51 +115,75 @@ def item_dl(sources, dlfolder):
''' '''
Download all items in the sources list to folder dlfolder, which we try to create" Download all items in the sources list to folder dlfolder, which we try to create"
Args: Args:
- sources : a list of URLs - sources : a list of URLsi
- global folder_content : see folder_watch()
''' '''
#Making folder
global folder_content
try: try:
#Making folder
mkdir(dlfolder) mkdir(dlfolder)
except FileExistsError: except FileExistsError:
print(f"{dlfolder} already exists, not creating") print(f"{dlfolder} already exists, not creating")
#Deduplicating #Deduplicating
imagenames = [] imagenames = []
dir_content = listdir(dlfolder)
for source in sources: for source in sources:
fullsource = "http://" + source fullsource = "http://" + source
imagename = findall(r"[^\/]*$", source)[0] imagename = findall(r"[^\/]*$", source)[0]
if imagename[:-4] not in str(dir_content): if imagename[:-4] not in folder_content:
name = wget.download(fullsource, out=dlfolder) name = wget.download(fullsource, out=dlfolder)
print(f"{name} downloaded") print(f"{name} downloaded")
return True return True
def constant_dl(folder, url): def folder_watch(folder):
'''
Watch for the content of a folder and return its content.
Content is a string containing all the names of all the elements.
Args:
- folder : folder to watch
- global folder_content : see folder_watch()
Returns:
folder_content : said string, containing all the names of all the files in the folder
'''
global folder_content
folder_list = listdir(folder)
folder_content = ""
for i in folder_list:
folder_content += i
def dl_threads(folder, url):
''' '''
Constantly download... Constantly download...
Args: Args:
- folder: folder to dl into - folder: folder to dl into
- url : board to watch - url : board to watch
''' '''
try: try:
while True: sleep(2)
sleep(2) soup = html_get(url)
soup = html_get(url) hrefs = thread_finder(soup, keyword)
hrefs = thread_finder(soup, keyword) sources = scraper(soup)
sources = scraper(soup) #item_dl(sources, folder)
#item_dl(sources, folder)
#Dling all threads found #Dling all threads found
#oneshot #oneshot
for href in hrefs: for href in hrefs:
print(f"going after {url}{href}") print(f"going after {url}{href}")
subsoup = html_get(f"{url}{href}") subsoup = html_get(f"{url}{href}")
subsources = scraper(subsoup) subsources = scraper(subsoup)
print(subsources) folder_watch(folder)
item_dl(subsources, folder) item_dl(subsources, folder)
except Exception as e: except Exception as e:
print(f"Houston, we had a problem: \n{e}") print(f"Houston, we had a problem: \n{e}")
@ -170,15 +193,12 @@ def constant_dl(folder, url):
args = getArgs() args = getArgs()
folder = args.folder folder = args.folder
keyword = args.keyword keyword = args.keyword
if args.threads:
threadnumber = args.threads
else:
threadnumber = 2
url = args.url url = args.url
soup = html_get(url) soup = html_get(url)
hrefs = thread_finder(soup, keyword) hrefs = thread_finder(soup, keyword)
sources = scraper(soup) sources = scraper(soup)
folder_content = ""
#item_dl(sources, folder) #item_dl(sources, folder)
#Dling all threads found #Dling all threads found
@ -186,16 +206,11 @@ sources = scraper(soup)
#oneshot #oneshot
if not args.constant: if not args.constant:
for href in hrefs: for href in hrefs:
print(f"going after {url}{href}") folder_watch(folder)
subsoup = html_get(f"{url}{href}") dl_threads(folder_url)
subsources = scraper(subsoup)
print(subsources)
item_dl(subsources, folder)
else: else:
thread_objects = [] while True:
for i in range (1, threadnumber): folder_watch(folder)
thread_objects.append(Thread(target=constant_dl, args=(folder, url))) dl_threads(folder, url)
for thread in thread_objects:
thread.start()

View File

@ -50,7 +50,6 @@ Use (constant, multi-threaded):
##Todo ##Todo
* Filter by filetype * Filter by filetype
* Multi-threaded not really working, -t 2 gives one thread and many threads will cause duplicates
* Use a try / catch when dling since some threads go 404 and it gives us a crash * Use a try / catch when dling since some threads go 404 and it gives us a crash
* Make a pretty website with some keywords running in the bg, making for some nice public folders (wallpapers...) * Make a pretty website with some keywords running in the bg, making for some nice public folders (wallpapers...)