Dédup fonctionne + anti cporn
This commit is contained in:
parent
a117554a04
commit
9f7a7f14f0
BIN
1612736548910.pngyc4rlphz.tmp
Normal file
BIN
1612736548910.pngyc4rlphz.tmp
Normal file
Binary file not shown.
After Width: | Height: | Size: 88 KiB |
@ -52,7 +52,7 @@ def thread_finder(homepage, keyword):
|
|||||||
for thread in threads:
|
for thread in threads:
|
||||||
texts = thread.findAll('blockquote', {'class' : 'postMessage'})
|
texts = thread.findAll('blockquote', {'class' : 'postMessage'})
|
||||||
for text in texts:
|
for text in texts:
|
||||||
if keyword.lower() in text.text.lower():
|
if keyword.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower():
|
||||||
links = thread.findAll('a', {'title': 'Reply to this post'})
|
links = thread.findAll('a', {'title': 'Reply to this post'})
|
||||||
for link in links:
|
for link in links:
|
||||||
hrefs.append(f"{link['href']}")
|
hrefs.append(f"{link['href']}")
|
||||||
@ -116,16 +116,13 @@ def item_dl(sources, dlfolder):
|
|||||||
#Deduplicating
|
#Deduplicating
|
||||||
imagenames = []
|
imagenames = []
|
||||||
dir_content = listdir(dlfolder)
|
dir_content = listdir(dlfolder)
|
||||||
for index,source in enumerate(sources):
|
|
||||||
imagename = findall(r"[^\/]*$", source)[0]
|
|
||||||
if imagename or f"{imagename} (1).jpg" in dir_content:
|
|
||||||
sources.pop(index)
|
|
||||||
print(f"Found duplicate {imagename}")
|
|
||||||
|
|
||||||
for source in sources:
|
for source in sources:
|
||||||
fullsource = "http://" + source
|
fullsource = "http://" + source
|
||||||
name = wget.download(fullsource, out=dlfolder)
|
imagename = findall(r"[^\/]*$", source)[0]
|
||||||
print(f"{name} downloaded")
|
if imagename[:-4] not in str(dir_content):
|
||||||
|
name = wget.download(fullsource, out=dlfolder)
|
||||||
|
print(f"{name} downloaded")
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user