Update '4chanthreadfinder.py'

Added multiple keywords support
This commit is contained in:
justine 2021-02-10 14:26:26 +00:00
parent a97067d452
commit 52fdd4f4b1

View File

@ -38,7 +38,7 @@ def getArgs():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True) parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True)
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True) parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True) parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'. Argument can be used multiple times", action='append', required=True)
parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true") parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
#Creating the args object #Creating the args object
@ -51,7 +51,7 @@ def thread_finder(homepage, keyword):
returns a list of all the threads where op said keyword on 4chan homepage returns a list of all the threads where op said keyword on 4chan homepage
Args: Args:
- homepage: bs4 soup object containing html from the homepage of the board - homepage: bs4 soup object containing html from the homepage of the board
- keyword : any single word - keyword : list of expressions to look for
Returns: Returns:
- hrefs : all the references to matching threads. They must be appended to homepage to work - hrefs : all the references to matching threads. They must be appended to homepage to work
''' '''
@ -62,7 +62,9 @@ def thread_finder(homepage, keyword):
for thread in threads: for thread in threads:
texts = thread.findAll('blockquote', {'class' : 'postMessage'}) texts = thread.findAll('blockquote', {'class' : 'postMessage'})
for text in texts: for text in texts:
if keyword.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower(): for word in keyword:
if word.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower():
print(f"Found {word}")
links = thread.findAll('a', {'title': 'Reply to this post'}) links = thread.findAll('a', {'title': 'Reply to this post'})
for link in links: for link in links:
hrefs.append(f"{link['href']}") hrefs.append(f"{link['href']}")
@ -190,9 +192,13 @@ def dl_threads(folder, url):
subsoup = html_get(f"{url}{href}") subsoup = html_get(f"{url}{href}")
subsources = scraper(subsoup) subsources = scraper(subsoup)
folder_watch(folder) folder_watch(folder)
try:
item_dl(subsources, folder) item_dl(subsources, folder)
except HTTPSConnectionPool as ConnErr:
print(f"Got Error {ConErr}, pipes must be clogged lulz")
except Exception as e: except Exception as e:
print(f"Houston, we had a problem: \n{e}") print(f"Houston, we had a problem with {url} and {folder}: \n{e}")
@ -208,6 +214,7 @@ sources = scraper(soup)
folder_create(folder) folder_create(folder)
folder_content = "" folder_content = ""
print("Lurking...")
#item_dl(sources, folder) #item_dl(sources, folder)
#Dling all threads found #Dling all threads found
@ -221,3 +228,5 @@ else:
while True: while True:
folder_watch(folder) folder_watch(folder)
dl_threads(folder, url) dl_threads(folder, url)
sleep(60)
print('Sayonara')