Update '4chanthreadfinder.py'

Added multiple keywords support
This commit is contained in:
justine 2021-02-10 14:26:26 +00:00
parent a97067d452
commit 52fdd4f4b1

View File

@ -38,7 +38,7 @@ def getArgs():
parser = argparse.ArgumentParser()
parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True)
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True)
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'. Argument can be used multiple times", action='append', required=True)
parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
#Creating the args object
@ -51,7 +51,7 @@ def thread_finder(homepage, keyword):
returns a list of all the threads where op said keyword on 4chan homepage
Args:
- homepage: bs4 soup object containing html from the homepage of the board
- keyword : any single word
- keyword : list of expressions to look for
Returns:
- hrefs : all the references to matching threads. They must be appended to homepage to work
'''
@ -62,7 +62,9 @@ def thread_finder(homepage, keyword):
for thread in threads:
texts = thread.findAll('blockquote', {'class' : 'postMessage'})
for text in texts:
if keyword.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower():
for word in keyword:
if word.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower():
print(f"Found {word}")
links = thread.findAll('a', {'title': 'Reply to this post'})
for link in links:
hrefs.append(f"{link['href']}")
@ -190,9 +192,13 @@ def dl_threads(folder, url):
subsoup = html_get(f"{url}{href}")
subsources = scraper(subsoup)
folder_watch(folder)
try:
item_dl(subsources, folder)
except HTTPSConnectionPool as ConnErr:
print(f"Got Error {ConErr}, pipes must be clogged lulz")
except Exception as e:
print(f"Houston, we had a problem: \n{e}")
print(f"Houston, we had a problem with {url} and {folder}: \n{e}")
@ -208,6 +214,7 @@ sources = scraper(soup)
folder_create(folder)
folder_content = ""
print("Lurking...")
#item_dl(sources, folder)
#Dling all threads found
@ -221,3 +228,5 @@ else:
while True:
folder_watch(folder)
dl_threads(folder, url)
sleep(60)
print('Sayonara')