Dédup fonctionne + anti cporn

This commit is contained in:
Justine 2021-02-08 00:28:26 +01:00
parent a117554a04
commit 9f7a7f14f0
2 changed files with 5 additions and 8 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

View File

@ -52,7 +52,7 @@ def thread_finder(homepage, keyword):
for thread in threads:
texts = thread.findAll('blockquote', {'class' : 'postMessage'})
for text in texts:
if keyword.lower() in text.text.lower():
if keyword.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower():
links = thread.findAll('a', {'title': 'Reply to this post'})
for link in links:
hrefs.append(f"{link['href']}")
@ -116,16 +116,13 @@ def item_dl(sources, dlfolder):
#Deduplicating
imagenames = []
dir_content = listdir(dlfolder)
for index,source in enumerate(sources):
imagename = findall(r"[^\/]*$", source)[0]
if imagename or f"{imagename} (1).jpg" in dir_content:
sources.pop(index)
print(f"Found duplicate {imagename}")
for source in sources:
fullsource = "http://" + source
name = wget.download(fullsource, out=dlfolder)
print(f"{name} downloaded")
imagename = findall(r"[^\/]*$", source)[0]
if imagename[:-4] not in str(dir_content):
name = wget.download(fullsource, out=dlfolder)
print(f"{name} downloaded")
return True