Update '4chanthreadfinder.py'
Added multiple keywords support
This commit is contained in:
parent
a97067d452
commit
52fdd4f4b1
@ -38,7 +38,7 @@ def getArgs():
|
|||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True)
|
parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True)
|
||||||
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
|
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
|
||||||
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'", action="store", type=str, required=True)
|
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'. Argument can be used multiple times", action='append', required=True)
|
||||||
parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
|
parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
|
||||||
|
|
||||||
#Creating the args object
|
#Creating the args object
|
||||||
@ -51,7 +51,7 @@ def thread_finder(homepage, keyword):
|
|||||||
returns a list of all the threads where op said keyword on 4chan homepage
|
returns a list of all the threads where op said keyword on 4chan homepage
|
||||||
Args:
|
Args:
|
||||||
- homepage: bs4 soup object containing html from the homepage of the board
|
- homepage: bs4 soup object containing html from the homepage of the board
|
||||||
- keyword : any single word
|
- keyword : list of expressions to look for
|
||||||
Returns:
|
Returns:
|
||||||
- hrefs : all the references to matching threads. They must be appended to homepage to work
|
- hrefs : all the references to matching threads. They must be appended to homepage to work
|
||||||
'''
|
'''
|
||||||
@ -62,7 +62,9 @@ def thread_finder(homepage, keyword):
|
|||||||
for thread in threads:
|
for thread in threads:
|
||||||
texts = thread.findAll('blockquote', {'class' : 'postMessage'})
|
texts = thread.findAll('blockquote', {'class' : 'postMessage'})
|
||||||
for text in texts:
|
for text in texts:
|
||||||
if keyword.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower():
|
for word in keyword:
|
||||||
|
if word.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower():
|
||||||
|
print(f"Found {word}")
|
||||||
links = thread.findAll('a', {'title': 'Reply to this post'})
|
links = thread.findAll('a', {'title': 'Reply to this post'})
|
||||||
for link in links:
|
for link in links:
|
||||||
hrefs.append(f"{link['href']}")
|
hrefs.append(f"{link['href']}")
|
||||||
@ -190,9 +192,13 @@ def dl_threads(folder, url):
|
|||||||
subsoup = html_get(f"{url}{href}")
|
subsoup = html_get(f"{url}{href}")
|
||||||
subsources = scraper(subsoup)
|
subsources = scraper(subsoup)
|
||||||
folder_watch(folder)
|
folder_watch(folder)
|
||||||
|
try:
|
||||||
item_dl(subsources, folder)
|
item_dl(subsources, folder)
|
||||||
|
except HTTPSConnectionPool as ConnErr:
|
||||||
|
print(f"Got Error {ConErr}, pipes must be clogged lulz")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Houston, we had a problem: \n{e}")
|
print(f"Houston, we had a problem with {url} and {folder}: \n{e}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -208,6 +214,7 @@ sources = scraper(soup)
|
|||||||
folder_create(folder)
|
folder_create(folder)
|
||||||
folder_content = ""
|
folder_content = ""
|
||||||
|
|
||||||
|
print("Lurking...")
|
||||||
#item_dl(sources, folder)
|
#item_dl(sources, folder)
|
||||||
|
|
||||||
#Dling all threads found
|
#Dling all threads found
|
||||||
@ -221,3 +228,5 @@ else:
|
|||||||
while True:
|
while True:
|
||||||
folder_watch(folder)
|
folder_watch(folder)
|
||||||
dl_threads(folder, url)
|
dl_threads(folder, url)
|
||||||
|
sleep(60)
|
||||||
|
print('Sayonara')
|
||||||
|
Reference in New Issue
Block a user