Update '4chanthreadfinder.py'

Added logging
This commit is contained in:
justine 2021-02-19 10:52:22 +00:00
parent 52fdd4f4b1
commit 77c20d67f1

View File

@ -7,9 +7,11 @@ from bs4 import BeautifulSoup
import mechanicalsoup import mechanicalsoup
import wget import wget
from os import mkdir, listdir from os import mkdir, listdir
from sys import path
from re import findall from re import findall
from time import sleep from time import sleep
from threading import Thread from threading import Thread
import logging
''' '''
############## 4chan thread scrapper ################ ############## 4chan thread scrapper ################
@ -39,6 +41,7 @@ def getArgs():
parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True) parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True)
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True) parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'. Argument can be used multiple times", action='append', required=True) parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'. Argument can be used multiple times", action='append', required=True)
parser.add_argument("-l", "--logfile", help="Name of the logfile. Please provide a name that is not already in use.", type=str, required=False)
parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true") parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
#Creating the args object #Creating the args object
@ -170,12 +173,13 @@ def folder_watch(folder):
folder_content += i folder_content += i
def dl_threads(folder, url): def dl_threads(folder, url, log_enabled):
''' '''
Constantly download... Constantly download...
Args: Args:
- folder: folder to dl into - folder: folder to dl into
- url : board to watch - url : board to watch
- log_enabled : Set True if logging lib is used
''' '''
try: try:
@ -194,9 +198,14 @@ def dl_threads(folder, url):
folder_watch(folder) folder_watch(folder)
try: try:
item_dl(subsources, folder) item_dl(subsources, folder)
if log_enabled:
logging.info(f"Downloaded {url}{href}")
sleep(2)
except HTTPSConnectionPool as ConnErr: except HTTPSConnectionPool as ConnErr:
print(f"Got Error {ConErr}, pipes must be clogged lulz") if log_enabled:
logging.error(f"Got Error {ConErr}, pipes must be clogged lulz")
else:
print(f"Got Error {ConErr}, pipes must be clogged lulz")
except Exception as e: except Exception as e:
print(f"Houston, we had a problem with {url} and {folder}: \n{e}") print(f"Houston, we had a problem with {url} and {folder}: \n{e}")
@ -206,6 +215,20 @@ def dl_threads(folder, url):
args = getArgs() args = getArgs()
folder = args.folder folder = args.folder
keyword = args.keyword keyword = args.keyword
if args.logfile:
logfile = args.logfile
#Creating Logfile
logging.basicConfig(\
format='%(asctime)s %(levelname)-8s %(message)s',\
filename=f"{path[0]}/{args.logfile}",\
level = logging.DEBUG,\
datefmt='%Y-%m-%d %H:%M:%S'\
)
log_enabled = True
else:
log_enabled = False
url = args.url url = args.url
soup = html_get(url) soup = html_get(url)
@ -223,7 +246,7 @@ print("Lurking...")
if not args.constant: if not args.constant:
for href in hrefs: for href in hrefs:
folder_watch(folder) folder_watch(folder)
dl_threads(folder, url) dl_threads(folder, url, log_enabled)
else: else:
while True: while True:
folder_watch(folder) folder_watch(folder)