Plus de threads, trop complexe
							
								
								
									
										
											BIN
										
									
								
								1612800279774.jpgg1ir4h9g.tmp
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 248 KiB | 
							
								
								
									
										
											BIN
										
									
								
								1612800847218.jpgz00i3nd3.tmp
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 272 KiB | 
							
								
								
									
										
											BIN
										
									
								
								1612801102502.jpg7bewagr3.tmp
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 408 KiB | 
							
								
								
									
										
											BIN
										
									
								
								1612801206104.jpgn8w76bsu.tmp
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 344 KiB | 
							
								
								
									
										
											BIN
										
									
								
								1612801259880.jpg66nlexsl.tmp
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 376 KiB | 
							
								
								
									
										
											BIN
										
									
								
								1612801259880.jpg6__c8xd4.tmp
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 408 KiB | 
							
								
								
									
										
											BIN
										
									
								
								1612801442460.jpguqt7rcl2.tmp
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 920 KiB | 
							
								
								
									
										
											BIN
										
									
								
								1612801753931.jpgebpmo83j.tmp
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 24 KiB | 
							
								
								
									
										
											BIN
										
									
								
								1612801753931.jpgr25ymlkk.tmp
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 24 KiB | 
							
								
								
									
										
											BIN
										
									
								
								1612801818460.jpgyn5eonf9.tmp
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 24 KiB | 
							
								
								
									
										
											BIN
										
									
								
								1612801951857.jpgm0h3hv2r.tmp
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 56 KiB | 
| @ -40,7 +40,6 @@ def getArgs(): | ||||
|     parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True) | ||||
|     parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True) | ||||
|     parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true") | ||||
|     parser.add_argument("-t", "--threads", help = "Number of threads in case of constant run, defaults to 2", action="store", type=int, required=False) | ||||
|  | ||||
|     #Creating the args object | ||||
|     args=parser.parse_args() | ||||
| @ -116,51 +115,75 @@ def item_dl(sources, dlfolder): | ||||
|     ''' | ||||
|     Download all items in the sources list to folder dlfolder, which we try to create" | ||||
|     Args: | ||||
|     - sources : a list of URLs | ||||
|     - sources : a list of URLsi | ||||
|     - global folder_content : see folder_watch() | ||||
|  | ||||
|     ''' | ||||
|     #Making folder | ||||
|      | ||||
|     global folder_content | ||||
|      | ||||
|     try: | ||||
|     #Making folder | ||||
|         mkdir(dlfolder) | ||||
|     except FileExistsError: | ||||
|         print(f"{dlfolder} already exists, not creating") | ||||
|  | ||||
|     #Deduplicating | ||||
|     imagenames = [] | ||||
|     dir_content = listdir(dlfolder) | ||||
|  | ||||
|     for source in sources: | ||||
|         fullsource = "http://" + source | ||||
|         imagename = findall(r"[^\/]*$", source)[0] | ||||
|         if imagename[:-4] not in str(dir_content): | ||||
|         if imagename[:-4] not in folder_content: | ||||
|             name = wget.download(fullsource, out=dlfolder) | ||||
|             print(f"{name} downloaded") | ||||
|  | ||||
|     return True | ||||
|  | ||||
| def constant_dl(folder, url): | ||||
| def folder_watch(folder): | ||||
|     ''' | ||||
|     Watch for the content of a folder and return its content. | ||||
|     Content is a string containing all the names of all the elements. | ||||
|     Args: | ||||
|     - folder : folder to watch | ||||
|     - global folder_content : see folder_watch() | ||||
|     Returns: | ||||
|     folder_content : said string, containing all the names of all the files in the folder | ||||
|     ''' | ||||
|  | ||||
|     global folder_content | ||||
|  | ||||
|     folder_list = listdir(folder) | ||||
|     folder_content = ""  | ||||
|  | ||||
|     for i in folder_list: | ||||
|         folder_content += i | ||||
|  | ||||
|  | ||||
| def dl_threads(folder, url): | ||||
|     ''' | ||||
|     Constantly download... | ||||
|     Args:  | ||||
|     - folder: folder to dl into | ||||
|     - url : board to watch | ||||
|     ''' | ||||
|  | ||||
|     try: | ||||
|         while True: | ||||
|             sleep(2) | ||||
|             soup = html_get(url) | ||||
|             hrefs = thread_finder(soup, keyword) | ||||
|             sources = scraper(soup) | ||||
|             #item_dl(sources, folder) | ||||
|         sleep(2) | ||||
|         soup = html_get(url) | ||||
|         hrefs = thread_finder(soup, keyword) | ||||
|         sources = scraper(soup) | ||||
|         #item_dl(sources, folder) | ||||
|  | ||||
|             #Dling all threads found | ||||
|         #Dling all threads found | ||||
|  | ||||
|             #oneshot | ||||
|             for href in hrefs: | ||||
|                 print(f"going after {url}{href}") | ||||
|                 subsoup = html_get(f"{url}{href}") | ||||
|                 subsources = scraper(subsoup) | ||||
|                 print(subsources) | ||||
|                 item_dl(subsources, folder) | ||||
|         #oneshot | ||||
|         for href in hrefs: | ||||
|             print(f"going after {url}{href}") | ||||
|             subsoup = html_get(f"{url}{href}") | ||||
|             subsources = scraper(subsoup) | ||||
|             folder_watch(folder) | ||||
|             item_dl(subsources, folder) | ||||
|     except Exception as e: | ||||
|         print(f"Houston, we had a problem: \n{e}") | ||||
|  | ||||
| @ -170,15 +193,12 @@ def constant_dl(folder, url): | ||||
| args = getArgs() | ||||
| folder = args.folder | ||||
| keyword = args.keyword | ||||
| if args.threads: | ||||
|     threadnumber = args.threads | ||||
| else: | ||||
|     threadnumber = 2 | ||||
|  | ||||
| url = args.url | ||||
| soup = html_get(url) | ||||
| hrefs = thread_finder(soup, keyword) | ||||
| sources = scraper(soup) | ||||
| folder_content = "" | ||||
| #item_dl(sources, folder) | ||||
|  | ||||
| #Dling all threads found | ||||
| @ -186,16 +206,11 @@ sources = scraper(soup) | ||||
| #oneshot | ||||
| if not args.constant: | ||||
|     for href in hrefs: | ||||
|         print(f"going after {url}{href}") | ||||
|         subsoup = html_get(f"{url}{href}") | ||||
|         subsources = scraper(subsoup) | ||||
|         print(subsources) | ||||
|         item_dl(subsources, folder) | ||||
|         folder_watch(folder) | ||||
|         dl_threads(folder_url) | ||||
| else: | ||||
|     thread_objects = [] | ||||
|     for i in range (1, threadnumber): | ||||
|         thread_objects.append(Thread(target=constant_dl, args=(folder, url))) | ||||
|     for thread in thread_objects: | ||||
|         thread.start() | ||||
|  | ||||
|     while True: | ||||
|             folder_watch(folder) | ||||
|             dl_threads(folder, url) | ||||
|              | ||||
|  | ||||
|  | ||||
| @ -50,7 +50,6 @@ Use (constant, multi-threaded): | ||||
|  | ||||
| ##Todo | ||||
| * Filter by filetype | ||||
| * Multi-threaded not really working, -t 2 gives one thread and many threads will cause duplicates | ||||
| * Use a try / catch when dling since some threads go 404 and it gives us a crash | ||||
| * Make a pretty website with some keywords running in the bg, making for some nice public folders (wallpapers...) | ||||
|  | ||||
|  | ||||