diff --git a/1612736548910.pngyc4rlphz.tmp b/1612736548910.pngyc4rlphz.tmp new file mode 100644 index 0000000..43c4ff3 Binary files /dev/null and b/1612736548910.pngyc4rlphz.tmp differ diff --git a/4chanthreadfinder.py b/4chanthreadfinder.py index 640c47b..5fdf4d4 100755 --- a/4chanthreadfinder.py +++ b/4chanthreadfinder.py @@ -52,7 +52,7 @@ def thread_finder(homepage, keyword): for thread in threads: texts = thread.findAll('blockquote', {'class' : 'postMessage'}) for text in texts: - if keyword.lower() in text.text.lower(): + if keyword.lower() in text.text.lower() and "loli" not in text.text.lower() and "shota" not in text.text.lower(): links = thread.findAll('a', {'title': 'Reply to this post'}) for link in links: hrefs.append(f"{link['href']}") @@ -116,16 +116,13 @@ def item_dl(sources, dlfolder): #Deduplicating imagenames = [] dir_content = listdir(dlfolder) - for index,source in enumerate(sources): - imagename = findall(r"[^\/]*$", source)[0] - if imagename or f"{imagename} (1).jpg" in dir_content: - sources.pop(index) - print(f"Found duplicate {imagename}") for source in sources: fullsource = "http://" + source - name = wget.download(fullsource, out=dlfolder) - print(f"{name} downloaded") + imagename = findall(r"[^\/]*$", source)[0] + if imagename[:-4] not in str(dir_content): + name = wget.download(fullsource, out=dlfolder) + print(f"{name} downloaded") return True