Plus de threads, trop complexe
BIN
1612800279774.jpgg1ir4h9g.tmp
Normal file
After Width: | Height: | Size: 248 KiB |
BIN
1612800847218.jpgz00i3nd3.tmp
Normal file
After Width: | Height: | Size: 272 KiB |
BIN
1612801102502.jpg7bewagr3.tmp
Normal file
After Width: | Height: | Size: 408 KiB |
BIN
1612801206104.jpgn8w76bsu.tmp
Normal file
After Width: | Height: | Size: 344 KiB |
BIN
1612801259880.jpg66nlexsl.tmp
Normal file
After Width: | Height: | Size: 376 KiB |
BIN
1612801259880.jpg6__c8xd4.tmp
Normal file
After Width: | Height: | Size: 408 KiB |
BIN
1612801442460.jpguqt7rcl2.tmp
Normal file
After Width: | Height: | Size: 920 KiB |
BIN
1612801753931.jpgebpmo83j.tmp
Normal file
After Width: | Height: | Size: 24 KiB |
BIN
1612801753931.jpgr25ymlkk.tmp
Normal file
After Width: | Height: | Size: 24 KiB |
BIN
1612801818460.jpgyn5eonf9.tmp
Normal file
After Width: | Height: | Size: 24 KiB |
BIN
1612801951857.jpgm0h3hv2r.tmp
Normal file
After Width: | Height: | Size: 56 KiB |
@ -40,7 +40,6 @@ def getArgs():
|
|||||||
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
|
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
|
||||||
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True)
|
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True)
|
||||||
parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
|
parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
|
||||||
parser.add_argument("-t", "--threads", help = "Number of threads in case of constant run, defaults to 2", action="store", type=int, required=False)
|
|
||||||
|
|
||||||
#Creating the args object
|
#Creating the args object
|
||||||
args=parser.parse_args()
|
args=parser.parse_args()
|
||||||
@ -116,51 +115,75 @@ def item_dl(sources, dlfolder):
|
|||||||
'''
|
'''
|
||||||
Download all items in the sources list to folder dlfolder, which we try to create"
|
Download all items in the sources list to folder dlfolder, which we try to create"
|
||||||
Args:
|
Args:
|
||||||
- sources : a list of URLs
|
- sources : a list of URLsi
|
||||||
|
- global folder_content : see folder_watch()
|
||||||
|
|
||||||
'''
|
'''
|
||||||
#Making folder
|
|
||||||
|
global folder_content
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
#Making folder
|
||||||
mkdir(dlfolder)
|
mkdir(dlfolder)
|
||||||
except FileExistsError:
|
except FileExistsError:
|
||||||
print(f"{dlfolder} already exists, not creating")
|
print(f"{dlfolder} already exists, not creating")
|
||||||
|
|
||||||
#Deduplicating
|
#Deduplicating
|
||||||
imagenames = []
|
imagenames = []
|
||||||
dir_content = listdir(dlfolder)
|
|
||||||
|
|
||||||
for source in sources:
|
for source in sources:
|
||||||
fullsource = "http://" + source
|
fullsource = "http://" + source
|
||||||
imagename = findall(r"[^\/]*$", source)[0]
|
imagename = findall(r"[^\/]*$", source)[0]
|
||||||
if imagename[:-4] not in str(dir_content):
|
if imagename[:-4] not in folder_content:
|
||||||
name = wget.download(fullsource, out=dlfolder)
|
name = wget.download(fullsource, out=dlfolder)
|
||||||
print(f"{name} downloaded")
|
print(f"{name} downloaded")
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def constant_dl(folder, url):
|
def folder_watch(folder):
|
||||||
|
'''
|
||||||
|
Watch for the content of a folder and return its content.
|
||||||
|
Content is a string containing all the names of all the elements.
|
||||||
|
Args:
|
||||||
|
- folder : folder to watch
|
||||||
|
- global folder_content : see folder_watch()
|
||||||
|
Returns:
|
||||||
|
folder_content : said string, containing all the names of all the files in the folder
|
||||||
|
'''
|
||||||
|
|
||||||
|
global folder_content
|
||||||
|
|
||||||
|
folder_list = listdir(folder)
|
||||||
|
folder_content = ""
|
||||||
|
|
||||||
|
for i in folder_list:
|
||||||
|
folder_content += i
|
||||||
|
|
||||||
|
|
||||||
|
def dl_threads(folder, url):
|
||||||
'''
|
'''
|
||||||
Constantly download...
|
Constantly download...
|
||||||
Args:
|
Args:
|
||||||
- folder: folder to dl into
|
- folder: folder to dl into
|
||||||
- url : board to watch
|
- url : board to watch
|
||||||
'''
|
'''
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
sleep(2)
|
||||||
sleep(2)
|
soup = html_get(url)
|
||||||
soup = html_get(url)
|
hrefs = thread_finder(soup, keyword)
|
||||||
hrefs = thread_finder(soup, keyword)
|
sources = scraper(soup)
|
||||||
sources = scraper(soup)
|
#item_dl(sources, folder)
|
||||||
#item_dl(sources, folder)
|
|
||||||
|
|
||||||
#Dling all threads found
|
#Dling all threads found
|
||||||
|
|
||||||
#oneshot
|
#oneshot
|
||||||
for href in hrefs:
|
for href in hrefs:
|
||||||
print(f"going after {url}{href}")
|
print(f"going after {url}{href}")
|
||||||
subsoup = html_get(f"{url}{href}")
|
subsoup = html_get(f"{url}{href}")
|
||||||
subsources = scraper(subsoup)
|
subsources = scraper(subsoup)
|
||||||
print(subsources)
|
folder_watch(folder)
|
||||||
item_dl(subsources, folder)
|
item_dl(subsources, folder)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Houston, we had a problem: \n{e}")
|
print(f"Houston, we had a problem: \n{e}")
|
||||||
|
|
||||||
@ -170,15 +193,12 @@ def constant_dl(folder, url):
|
|||||||
args = getArgs()
|
args = getArgs()
|
||||||
folder = args.folder
|
folder = args.folder
|
||||||
keyword = args.keyword
|
keyword = args.keyword
|
||||||
if args.threads:
|
|
||||||
threadnumber = args.threads
|
|
||||||
else:
|
|
||||||
threadnumber = 2
|
|
||||||
|
|
||||||
url = args.url
|
url = args.url
|
||||||
soup = html_get(url)
|
soup = html_get(url)
|
||||||
hrefs = thread_finder(soup, keyword)
|
hrefs = thread_finder(soup, keyword)
|
||||||
sources = scraper(soup)
|
sources = scraper(soup)
|
||||||
|
folder_content = ""
|
||||||
#item_dl(sources, folder)
|
#item_dl(sources, folder)
|
||||||
|
|
||||||
#Dling all threads found
|
#Dling all threads found
|
||||||
@ -186,16 +206,11 @@ sources = scraper(soup)
|
|||||||
#oneshot
|
#oneshot
|
||||||
if not args.constant:
|
if not args.constant:
|
||||||
for href in hrefs:
|
for href in hrefs:
|
||||||
print(f"going after {url}{href}")
|
folder_watch(folder)
|
||||||
subsoup = html_get(f"{url}{href}")
|
dl_threads(folder_url)
|
||||||
subsources = scraper(subsoup)
|
|
||||||
print(subsources)
|
|
||||||
item_dl(subsources, folder)
|
|
||||||
else:
|
else:
|
||||||
thread_objects = []
|
while True:
|
||||||
for i in range (1, threadnumber):
|
folder_watch(folder)
|
||||||
thread_objects.append(Thread(target=constant_dl, args=(folder, url)))
|
dl_threads(folder, url)
|
||||||
for thread in thread_objects:
|
|
||||||
thread.start()
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -50,7 +50,6 @@ Use (constant, multi-threaded):
|
|||||||
|
|
||||||
##Todo
|
##Todo
|
||||||
* Filter by filetype
|
* Filter by filetype
|
||||||
* Multi-threaded not really working, -t 2 gives one thread and many threads will cause duplicates
|
|
||||||
* Use a try / catch when dling since some threads go 404 and it gives us a crash
|
* Use a try / catch when dling since some threads go 404 and it gives us a crash
|
||||||
* Make a pretty website with some keywords running in the bg, making for some nice public folders (wallpapers...)
|
* Make a pretty website with some keywords running in the bg, making for some nice public folders (wallpapers...)
|
||||||
|
|
||||||
|