Update '4chanthreadfinder.py'
Added logging
This commit is contained in:
parent
52fdd4f4b1
commit
77c20d67f1
@ -7,9 +7,11 @@ from bs4 import BeautifulSoup
|
|||||||
import mechanicalsoup
|
import mechanicalsoup
|
||||||
import wget
|
import wget
|
||||||
from os import mkdir, listdir
|
from os import mkdir, listdir
|
||||||
|
from sys import path
|
||||||
from re import findall
|
from re import findall
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
import logging
|
||||||
|
|
||||||
'''
|
'''
|
||||||
############## 4chan thread scrapper ################
|
############## 4chan thread scrapper ################
|
||||||
@ -39,6 +41,7 @@ def getArgs():
|
|||||||
parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True)
|
parser.add_argument("-u", "--url", help = "URL of the board. MUST INCLUDE FINAL /, ex : https://boards.4chan.org/b/", action="store", type=str, required=True)
|
||||||
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
|
parser.add_argument("-f", "--folder", help = "Folder in which downloads will go, ex: ./downloads", action="store", type=str, required=True)
|
||||||
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'. Argument can be used multiple times", action='append', required=True)
|
parser.add_argument("-k", "--keyword", help = "keyword or phrase to look for in the threads, ex : 'cute thread'. Argument can be used multiple times", action='append', required=True)
|
||||||
|
parser.add_argument("-l", "--logfile", help="Name of the logfile. Please provide a name that is not already in use.", type=str, required=False)
|
||||||
parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
|
parser.add_argument("-c", "--constant", help = "Constantly download", action="store_true")
|
||||||
|
|
||||||
#Creating the args object
|
#Creating the args object
|
||||||
@ -170,12 +173,13 @@ def folder_watch(folder):
|
|||||||
folder_content += i
|
folder_content += i
|
||||||
|
|
||||||
|
|
||||||
def dl_threads(folder, url):
|
def dl_threads(folder, url, log_enabled):
|
||||||
'''
|
'''
|
||||||
Constantly download...
|
Constantly download...
|
||||||
Args:
|
Args:
|
||||||
- folder: folder to dl into
|
- folder: folder to dl into
|
||||||
- url : board to watch
|
- url : board to watch
|
||||||
|
- log_enabled : Set True if logging lib is used
|
||||||
'''
|
'''
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -194,9 +198,14 @@ def dl_threads(folder, url):
|
|||||||
folder_watch(folder)
|
folder_watch(folder)
|
||||||
try:
|
try:
|
||||||
item_dl(subsources, folder)
|
item_dl(subsources, folder)
|
||||||
|
if log_enabled:
|
||||||
|
logging.info(f"Downloaded {url}{href}")
|
||||||
|
sleep(2)
|
||||||
except HTTPSConnectionPool as ConnErr:
|
except HTTPSConnectionPool as ConnErr:
|
||||||
print(f"Got Error {ConErr}, pipes must be clogged lulz")
|
if log_enabled:
|
||||||
|
logging.error(f"Got Error {ConErr}, pipes must be clogged lulz")
|
||||||
|
else:
|
||||||
|
print(f"Got Error {ConErr}, pipes must be clogged lulz")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Houston, we had a problem with {url} and {folder}: \n{e}")
|
print(f"Houston, we had a problem with {url} and {folder}: \n{e}")
|
||||||
|
|
||||||
@ -206,6 +215,20 @@ def dl_threads(folder, url):
|
|||||||
args = getArgs()
|
args = getArgs()
|
||||||
folder = args.folder
|
folder = args.folder
|
||||||
keyword = args.keyword
|
keyword = args.keyword
|
||||||
|
if args.logfile:
|
||||||
|
logfile = args.logfile
|
||||||
|
|
||||||
|
#Creating Logfile
|
||||||
|
logging.basicConfig(\
|
||||||
|
format='%(asctime)s %(levelname)-8s %(message)s',\
|
||||||
|
filename=f"{path[0]}/{args.logfile}",\
|
||||||
|
level = logging.DEBUG,\
|
||||||
|
datefmt='%Y-%m-%d %H:%M:%S'\
|
||||||
|
)
|
||||||
|
log_enabled = True
|
||||||
|
else:
|
||||||
|
log_enabled = False
|
||||||
|
|
||||||
|
|
||||||
url = args.url
|
url = args.url
|
||||||
soup = html_get(url)
|
soup = html_get(url)
|
||||||
@ -223,7 +246,7 @@ print("Lurking...")
|
|||||||
if not args.constant:
|
if not args.constant:
|
||||||
for href in hrefs:
|
for href in hrefs:
|
||||||
folder_watch(folder)
|
folder_watch(folder)
|
||||||
dl_threads(folder, url)
|
dl_threads(folder, url, log_enabled)
|
||||||
else:
|
else:
|
||||||
while True:
|
while True:
|
||||||
folder_watch(folder)
|
folder_watch(folder)
|
||||||
|
Reference in New Issue
Block a user