Various Blog

抓图自动机

2018-11-25 14:56:58


from urllib import request
import os
import time
import hashlib
linkn = 'https://api.3ewl.cc/acg/img.php'
path = os.getcwd()
print ('Current Work Path is:',path)
ftcnt = 0

def filecount():
    filecount = int(os.popen('dir /B |find /V /C ""').read())
    return (filecount)

def md5sum(filename):
    f = open(filename, 'rb')
    md5 = hashlib.md5()
    while True:
        fb = f.read(8096)
        if not fb:
            break
        md5.update(fb)
    f.close()
    return (md5.hexdigest())

def delfile():
    all_md5 = {}
    filedir = os.walk(os.getcwd())
    for i in filedir:
        for tlie in i[2]:
            if md5sum(tlie) in all_md5.values():
                os.remove(tlie)
            else:
                all_md5[tlie] = md5sum(tlie)

oldf = 0
while True:
    if (oldf + ftcnt) % 100 == 0:
        print('Cleaning Double Files.')
        oldf = filecount()
        print(oldf, 'Files Before Removal.')
        delfile()
        print(filecount(), 'Files After Removal.')
        print('Deleted ', oldf - filecount(), 'Files.')
        oldf = filecount()

    ftcnt = ftcnt + 1
    if ftcnt % 40 == 0:
        print('Fetching',ftcnt,'th Picture from ',linkn,'...')
    request.urlretrieve(linkn,path + '\\' + str(time.time()) + '.jpg')