diff --git a/4chanthreadfinder.py b/4chanthreadfinder.py index 268f1cc..6da1990 100755 --- a/4chanthreadfinder.py +++ b/4chanthreadfinder.py @@ -7,9 +7,11 @@ from bs4 import BeautifulSoup import mechanicalsoup import wget from os import mkdir, listdir +from sys import path from re import findall from time import sleep from threading import Thread +import logging ''' ############## 4chan thread scrapper ################ @@ -39,6 +41,7 @@ def getArgs(): parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True) parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True) parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'. Argument can be used multiple times", action='append', required=True) + parser.add_argument("-l", "--logfile", help="Name of the logfile. Please provide a name that is not already in use.", type=str, required=False) parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true") #Creating the args object @@ -170,12 +173,13 @@ def folder_watch(folder): folder_content += i -def dl_threads(folder, url): +def dl_threads(folder, url, log_enabled): ''' Constantly download... Args: - folder: folder to dl into - url : board to watch + - log_enabled : Set True if logging lib is used ''' try: @@ -194,9 +198,14 @@ def dl_threads(folder, url): folder_watch(folder) try: item_dl(subsources, folder) + if log_enabled: + logging.info(f"Downloaded {url}{href}") + sleep(2) except HTTPSConnectionPool as ConnErr: - print(f"Got Error {ConErr}, pipes must be clogged lulz") - + if log_enabled: + logging.error(f"Got Error {ConErr}, pipes must be clogged lulz") + else: + print(f"Got Error {ConErr}, pipes must be clogged lulz") except Exception as e: print(f"Houston, we had a problem with {url} and {folder}: \n{e}") @@ -206,6 +215,20 @@ def dl_threads(folder, url): args = getArgs() folder = args.folder keyword = args.keyword +if args.logfile: + logfile = args.logfile + + #Creating Logfile + logging.basicConfig(\ + format='%(asctime)s %(levelname)-8s %(message)s',\ + filename=f"{path[0]}/{args.logfile}",\ + level = logging.DEBUG,\ + datefmt='%Y-%m-%d %H:%M:%S'\ + ) + log_enabled = True +else: + log_enabled = False + url = args.url soup = html_get(url) @@ -223,7 +246,7 @@ print("Lurking...") if not args.constant: for href in hrefs: folder_watch(folder) - dl_threads(folder, url) + dl_threads(folder, url, log_enabled) else: while True: folder_watch(folder)