Update '4chanthreadfinder.py'

Added multiple keywords support
2021-02-10 14:26:26 +00:00
parent a97067d452
commit 52fdd4f4b1
1 changed files with 18 additions and 9 deletions
--- a/4chanthreadfinder.py
+++ b/4chanthreadfinder.py
@ -38,7 +38,7 @@ def getArgs():
    parser = argparse.ArgumentParser()
    parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True)
    parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
-    parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True)
+    parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'. Argument can be used multiple times", action='append', required=True)
    parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")

    #Creating the args object
@ -51,7 +51,7 @@ def thread_finder(homepage, keyword):
    returns a list of all the threads where op said keyword on 4chan homepage
    Args:
    - homepage: bs4 soup object containing html from the homepage of the board
-    - keyword : any single word
+    - keyword : list of expressions to look for
    Returns:
    - hrefs : all the references to matching threads. They must be appended to homepage to work
    '''
@ -62,7 +62,9 @@ def thread_finder(homepage, keyword):
    for thread in threads:
        texts = thread.findAll('blockquote', {'class' : 'postMessage'})
        for text in texts:
-            if keyword.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower():
+            for word in keyword:
+                if word.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower():
+                    print(f"Found {word}")
                    links = thread.findAll('a', {'title': 'Reply to this post'})
                    for link in links:
                        hrefs.append(f"{link['href']}")
@ -190,9 +192,13 @@ def dl_threads(folder, url):
            subsoup = html_get(f"{url}{href}")
            subsources = scraper(subsoup)
            folder_watch(folder)
+            try:
                item_dl(subsources, folder)
+            except HTTPSConnectionPool as ConnErr:
+                print(f"Got Error {ConErr}, pipes must be clogged lulz")
+        
    except Exception as e:
-        print(f"Houston, we had a problem: \n{e}")
+        print(f"Houston, we had a problem with {url} and {folder}: \n{e}")



@ -208,6 +214,7 @@ sources = scraper(soup)
 folder_create(folder)
 folder_content = ""

+print("Lurking...")
 #item_dl(sources, folder)

 #Dling all threads found
@ -221,3 +228,5 @@ else:
    while True:
            folder_watch(folder)
            dl_threads(folder, url)
+            sleep(60)            
+print('Sayonara')