Update '4chanthreadfinder.py'
Added multiple keywords support
This commit is contained in:
parent
a97067d452
commit
52fdd4f4b1
@ -38,7 +38,7 @@ def getArgs():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True)
|
||||
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
|
||||
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True)
|
||||
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'. Argument can be used multiple times", action='append', required=True)
|
||||
parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
|
||||
|
||||
#Creating the args object
|
||||
@ -51,7 +51,7 @@ def thread_finder(homepage, keyword):
|
||||
returns a list of all the threads where op said keyword on 4chan homepage
|
||||
Args:
|
||||
- homepage: bs4 soup object containing html from the homepage of the board
|
||||
- keyword : any single word
|
||||
- keyword : list of expressions to look for
|
||||
Returns:
|
||||
- hrefs : all the references to matching threads. They must be appended to homepage to work
|
||||
'''
|
||||
@ -62,11 +62,13 @@ def thread_finder(homepage, keyword):
|
||||
for thread in threads:
|
||||
texts = thread.findAll('blockquote', {'class' : 'postMessage'})
|
||||
for text in texts:
|
||||
if keyword.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower():
|
||||
links = thread.findAll('a', {'title': 'Reply to this post'})
|
||||
for link in links:
|
||||
hrefs.append(f"{link['href']}")
|
||||
return hrefs
|
||||
for word in keyword:
|
||||
if word.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower():
|
||||
print(f"Found {word}")
|
||||
links = thread.findAll('a', {'title': 'Reply to this post'})
|
||||
for link in links:
|
||||
hrefs.append(f"{link['href']}")
|
||||
return hrefs
|
||||
|
||||
|
||||
|
||||
@ -190,9 +192,13 @@ def dl_threads(folder, url):
|
||||
subsoup = html_get(f"{url}{href}")
|
||||
subsources = scraper(subsoup)
|
||||
folder_watch(folder)
|
||||
item_dl(subsources, folder)
|
||||
try:
|
||||
item_dl(subsources, folder)
|
||||
except HTTPSConnectionPool as ConnErr:
|
||||
print(f"Got Error {ConErr}, pipes must be clogged lulz")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Houston, we had a problem: \n{e}")
|
||||
print(f"Houston, we had a problem with {url} and {folder}: \n{e}")
|
||||
|
||||
|
||||
|
||||
@ -208,6 +214,7 @@ sources = scraper(soup)
|
||||
folder_create(folder)
|
||||
folder_content = ""
|
||||
|
||||
print("Lurking...")
|
||||
#item_dl(sources, folder)
|
||||
|
||||
#Dling all threads found
|
||||
@ -221,3 +228,5 @@ else:
|
||||
while True:
|
||||
folder_watch(folder)
|
||||
dl_threads(folder, url)
|
||||
sleep(60)
|
||||
print('Sayonara')
|
||||
|
Reference in New Issue
Block a user