diff --git a/4chanthreadfinder.py b/4chanthreadfinder.py index d3b49de..268f1cc 100755 --- a/4chanthreadfinder.py +++ b/4chanthreadfinder.py @@ -38,7 +38,7 @@ def getArgs(): parser = argparse.ArgumentParser() parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True) parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True) - parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True) + parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'. Argument can be used multiple times", action='append', required=True) parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true") #Creating the args object @@ -51,7 +51,7 @@ def thread_finder(homepage, keyword): returns a list of all the threads where op said keyword on 4chan homepage Args: - homepage: bs4 soup object containing html from the homepage of the board - - keyword : any single word + - keyword : list of expressions to look for Returns: - hrefs : all the references to matching threads. They must be appended to homepage to work ''' @@ -62,11 +62,13 @@ def thread_finder(homepage, keyword): for thread in threads: texts = thread.findAll('blockquote', {'class' : 'postMessage'}) for text in texts: - if keyword.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower(): - links = thread.findAll('a', {'title': 'Reply to this post'}) - for link in links: - hrefs.append(f"{link['href']}") - return hrefs + for word in keyword: + if word.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower(): + print(f"Found {word}") + links = thread.findAll('a', {'title': 'Reply to this post'}) + for link in links: + hrefs.append(f"{link['href']}") + return hrefs @@ -190,9 +192,13 @@ def dl_threads(folder, url): subsoup = html_get(f"{url}{href}") subsources = scraper(subsoup) folder_watch(folder) - item_dl(subsources, folder) + try: + item_dl(subsources, folder) + except HTTPSConnectionPool as ConnErr: + print(f"Got Error {ConErr}, pipes must be clogged lulz") + except Exception as e: - print(f"Houston, we had a problem: \n{e}") + print(f"Houston, we had a problem with {url} and {folder}: \n{e}") @@ -208,6 +214,7 @@ sources = scraper(soup) folder_create(folder) folder_content = "" +print("Lurking...") #item_dl(sources, folder) #Dling all threads found @@ -221,3 +228,5 @@ else: while True: folder_watch(folder) dl_threads(folder, url) + sleep(60) +print('Sayonara')