Update '4chanthreadfinder.py'

Added logging
This commit is contained in:
justine 2021-02-19 10:52:22 +00:00
parent 52fdd4f4b1
commit 77c20d67f1

View File

@ -7,9 +7,11 @@ from bs4 import BeautifulSoup
import mechanicalsoup
import wget
from os import mkdir, listdir
from sys import path
from re import findall
from time import sleep
from threading import Thread
import logging
'''
############## 4chan thread scrapper ################
@ -39,6 +41,7 @@ def getArgs():
parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True)
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'. Argument can be used multiple times", action='append', required=True)
parser.add_argument("-l", "--logfile", help="Name of the logfile. Please provide a name that is not already in use.", type=str, required=False)
parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
#Creating the args object
@ -170,12 +173,13 @@ def folder_watch(folder):
folder_content += i
def dl_threads(folder, url):
def dl_threads(folder, url, log_enabled):
'''
Constantly download...
Args:
- folder: folder to dl into
- url : board to watch
- log_enabled : Set True if logging lib is used
'''
try:
@ -194,9 +198,14 @@ def dl_threads(folder, url):
folder_watch(folder)
try:
item_dl(subsources, folder)
if log_enabled:
logging.info(f"Downloaded {url}{href}")
sleep(2)
except HTTPSConnectionPool as ConnErr:
print(f"Got Error {ConErr}, pipes must be clogged lulz")
if log_enabled:
logging.error(f"Got Error {ConErr}, pipes must be clogged lulz")
else:
print(f"Got Error {ConErr}, pipes must be clogged lulz")
except Exception as e:
print(f"Houston, we had a problem with {url} and {folder}: \n{e}")
@ -206,6 +215,20 @@ def dl_threads(folder, url):
args = getArgs()
folder = args.folder
keyword = args.keyword
if args.logfile:
logfile = args.logfile
#Creating Logfile
logging.basicConfig(\
format='%(asctime)s %(levelname)-8s %(message)s',\
filename=f"{path[0]}/{args.logfile}",\
level = logging.DEBUG,\
datefmt='%Y-%m-%d %H:%M:%S'\
)
log_enabled = True
else:
log_enabled = False
url = args.url
soup = html_get(url)
@ -223,7 +246,7 @@ print("Lurking...")
if not args.constant:
for href in hrefs:
folder_watch(folder)
dl_threads(folder, url)
dl_threads(folder, url, log_enabled)
else:
while True:
folder_watch(folder)