欢迎访问 生活随笔!

生活随笔

当前位置: 首页 > 编程资源 > 编程问答 >内容正文

编程问答

脚本检测CDN节点资源是否与源站资源一致

发布时间:2024/9/5 编程问答 54 豆豆
生活随笔 收集整理的这篇文章主要介绍了 脚本检测CDN节点资源是否与源站资源一致 小编觉得挺不错的,现在分享给大家,帮大家做个参考.

需求:

  1、所有要检测的资源url放到一个单独文件中

  2、检测cdn节点资源大小与源站文件大小是否一致

  3、随机抽查几个资源,检查md5sum是否一致

  4、使用多线程,可配置线程数

 

代码目录:

hexm:Hexm hexm$ tree ./checkcdn ./checkcdn ├── README.TXT ├── check.py # 主程序 ├── conf │ └── url.txt # 配置文件 ├── lib │ ├── __init__.py │ ├── common.py │ └── threadpool.py # 线程池 └── tmp├── cdn # 存放从CDN节点系在的资源└── origin # 存放从源站下载的资源

 

README.TXT

依赖:requests 兼容性:兼容Python3以及Python2.7使用方法:usage: check.py [-h] [-t THREADS] [-c COUNTS]optional arguments:-h, --help show this help message and exit-t THREADS, --threads THREADS开启多少线程,默认5个-c COUNTS, --counts COUNTS检测多少个包的md5值,默认3个

conf/url.txt

http://xxx_1020101.apk
http://xxx_1020102.apk
http://xxx_1020103.apk
http://xxx_1020104.apk

check.py

#!/usr/bin/env python # -*- coding:utf-8 -*- # File Name : check.py # Author : hexm # Mail : xiaoming.unix@gmail.com # Created Time : 2017-03-24 10:03import os import sys import random import argparse import requestsBASE_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.append(BASE_DIR)# 代理IP PROXIES = {"http": "http://183.136.135.191:80", } # 配置文件 CONFIG = BASE_DIR + '/conf/url.txt' # 保存CDN节点文件临时目录 CDNTEMPDIR = BASE_DIR + '/tmp/cdn/' # 保存源站文件临时目录 ORIGINTEMPDIR = BASE_DIR + '/tmp/origin/'from lib.threadpool import ThreadPool from lib.common import isdir, download, getfilemd5def callback(status, result):"""回调函数,如果函数有返回值得话用得到:param status: 状态 True or None:param result: 函数返回值"""passdef checkstatus(url):"""通过head方法查看源站与当前CDN节点资源大小是否一致:param url: url:return: None"""r1 = requests.head(url, proxies=PROXIES)r2 = requests.head(url)if r1.status_code == 200 and r2.status_code == 200:if r1.headers['Content-Length'] == r2.headers['Content-Length']:print("%s 源站和CDN节点资源\033[0;32m一致\033[0m, 源站文件大小为%s,CDN节点文件大小为%s"% (url,r1.headers['Content-Length'],r2.headers['Content-Length']))else:print("%s 源站和CDN节点资源\033[0;31m不一致\033[0m, 源站文件大小为%s,CDN节点文件大小为%s"% (url,r1.headers['Content-Length'],r2.headers['Content-Length']))else:print("%s 源站和CDN节点状态码\033[0;31m异常\033[0m,源站状态码为%s,CDN节点状态码为%s"% (url,r1.status_code,r2.status_code))def checkmd5(url, cdnTempDir, originTempDir):"""检查源站与当前cdn节点资源是否一致,下载超时300s:param url: url:param cdnTempDir: 保存从cdn节点下载的临时文件目录:param originTempDir: 保存从源站下载的临时文件目录:return: None"""filename = url.split('/')[-1]tempCdnFile = cdnTempDir + filenametempOriginFile = originTempDir + filenamestatus1 = download(url, tempOriginFile, proxies=PROXIES)if status1 is not None:if status1 == 200:status2 = download(url, tempCdnFile)else:print("%s \033[0;31m状态码异常\033[0m校验失败" % url)if status1 == 200 and status2 == 200:if getfilemd5(tempCdnFile) == getfilemd5(tempOriginFile):print("%s 源站和cdn节点资源md5值\033[0;32m一致\033[0m," % url)else:print("%s 源站和cdn节点资源md5值\033[0;31m不一致\033[0m" % url)elif status1 is None or status2 is None:print("%s \033[0;31m下载失败\033[0m" % url)# 检查后删除下载的文件try:os.remove(tempOriginFile)os.remove(tempCdnFile)except Exception as e:passdef parse_args():"""解析命令行参数:return: args"""parser = argparse.ArgumentParser()help = '开启多少线程,默认5个'parser.add_argument('-t', '--threads', type=int, help=help, default='5')help = '检测多少个包的md5值,默认3个'parser.add_argument('-c', '--counts', type=int, help=help, default=3)args = parser.parse_args()return argsif __name__ == "__main__":if not isdir(CDNTEMPDIR): os.makedirs(CDNTEMPDIR)if not isdir(ORIGINTEMPDIR): os.makedirs(ORIGINTEMPDIR)# 从文件中获取所有urlurls = [line.strip() for line in open(CONFIG, mode='r').readlines()]args = parse_args()# 检查包大小pool = ThreadPool(args.threads) # 最多创建5个线程for url in urls:pool.run(checkstatus, (url,), callback=None)# 随机抽查3个,检查md5for randurl in random.sample(urls, args.counts):pool.run(checkmd5, (randurl, CDNTEMPDIR, ORIGINTEMPDIR,), callback=None)pool.close() check.py

lib/common.py

#!/usr/bin/env python # -*- coding:utf-8 -*- # File Name : common.py # Author : hexm # Mail : xiaoming.unix@gmail.com # Created Time : 2017-03-24 10:03import os import hashlib import requestsdef getfilesize(path):"""获取文件大小:param path: 文件路径:return: 返回文件大小"""return os.path.getsize(path)def isfile(path):"""判断是否是文件:param path: 文件路径:return: 如果是返回True,否则返回None"""if os.path.isfile(path): return Truedef isdir(path):"""判断是否是目录:param path: 路径:return: True or None"""if os.path.isdir(path): return Truedef getstatus(url, proxies=None):"""返回状态码:param url: url:return: 状态码"""return requests.head(url, proxies).status_codedef download(url, path, proxies=None):"""下载文件,并返回状态码:param url: 下载的url:param path: 保存文件的路径:param proxies: 使用代理的地址:return: 返回状态码"""try:response = requests.get(url, proxies=proxies, stream=True, timeout=60)status = response.status_codetotal_size = int(response.headers['Content-Length'])# print(response.headers)if status == 200:with open(path, 'wb') as f:for chunk in response.iter_content(chunk_size=8192):if chunk: f.write(chunk)if total_size == getfilesize(path): # 下载文件大小与头部Content-Length大小一致,则下载成功return status# 状态码非200,返回状态码else: return statusexcept Exception as e:return Nonedef getfilemd5(path):"""返回文件的md5sum:param path: 文件路径:return: 返回校验和,否则返回None"""if isfile(path):md5obj = hashlib.md5()maxbuf = 8192f = open(path, 'rb')while True:buf = f.read(maxbuf)if not buf:breakmd5obj.update(buf)f.close()hash = md5obj.hexdigest()return hashreturn Noneif __name__ == "__main__":pass View Code

lib/threadpool.py

#!/usr/bin/env python # -*- coding:utf-8 -*- # File Name : threadpool.py # Author : hexm # Mail : xiaoming.unix@gmail.com # Created Time : 2017-03-23 20:03import sys if sys.version > '3':import queue else:import Queue as queue import threading import contextlib import timeStopEvent = object() # 终止线程信号class ThreadPool(object):"""1、解决线程重用问题,当前线程执行完任务后,不杀掉,放到空闲线程列表,继续执行下个任务2、根据任务量开启线程,如果设置10个线程,只有2个任务,最多只会开启两个线程3、如果有500个任务,任务执行非常快,2个线程就能完成,如果设置开启10个线程,只会开启两个线程"""def __init__(self, max_num, max_task_num = None):if max_task_num:self.q = queue.Queue(max_task_num) # 指定任务最大数,默认为None,不限定else:self.q = queue.Queue()self.max_num = max_num # 最多多少线程self.cancel = False # 执行完所有任务,终止线程信号self.terminal = False # 无论执行完毕与否,都终止所有线程self.generate_list = [] # 已创建多少线程self.free_list = [] # 空闲多少线程def run(self, func, args, callback=None):"""线程池执行一个任务:param func: 任务函数:param args: 任务函数所需参数:param callback: 任务执行失败或成功后执行的回调函数,回调函数有两个参数1、任务函数执行状态;2、任务函数返回值:return: 如果线程池已经终止,则返回True否则None"""if self.cancel:return# 没有空闲线程 并且已创建线程小于最大线程数才创建线程,if len(self.free_list) == 0 and len(self.generate_list) < self.max_num:self.generate_thread() # 满足则创建线程,并将任务放进队列w = (func, args, callback,)# 函数,元组,函数 ,将这三个参数放在元组里面,当成一个整体放到队列里面self.q.put(w) # 满足条件则创建线程,并把任务放队列里面def generate_thread(self):"""创建一个线程"""t = threading.Thread(target=self.call) # 每一个线程被创建,执行call方法 t.start()def call(self):"""循环去获取任务函数并执行任务函数"""current_thread = threading.currentThread()self.generate_list.append(current_thread) # 每创建一个线程,将当前线程名加进已创建的线程列表 event = self.q.get() # 在队列中取任务, 没任务线程就阻塞,等待取到任务,线程继续向下执行while event != StopEvent: # 是否满足终止线程 func, arguments, callback = event # 取出队列中一个任务try:result = func(*arguments) # 执行函数,并将参数传进去success = Trueexcept Exception as e:success = Falseresult = Noneif callback is not None:try:callback(success, result)except Exception as e:passwith self.worker_state(self.free_list, current_thread): # 当前线程执行完任务,将当前线程置于空闲状态,#这个线程等待队列中下一个任务到来,如果没来,一直处于空闲, 如果到来,去任务if self.terminal:event = StopEventelse:event = self.q.get() # 将当前任务加入到空闲列表后,如果有任务,取到,没有阻塞 取到后,移除当前线程else: # 满足终止线程,在创建的线程列表中移除当前线程 self.generate_list.remove(current_thread)def close(self):"""执行完所有的任务后,杀掉所有线程"""self.cancel = True # 标志设置为Truefull_size = len(self.generate_list) + 1 # 已生成线程个数, +1 针对python2.7while full_size:self.q.put(StopEvent) # full_size -= 1def terminate(self):"""无论是否还有任务,终止线程"""self.terminal = Truewhile self.generate_list:self.q.put(StopEvent)self.q.queue.clear()@contextlib.contextmanagerdef worker_state(self, state_list, worker_thread):"""用于记录线程中正在等待的线程数"""state_list.append(worker_thread) # 将当前空闲线程加入空闲列表try:yieldfinally:state_list.remove(worker_thread) # 取到任务后,将当前空闲线程从空闲线程里移除,# 使用例子 if __name__ == "__main__":pool = ThreadPool(5) # 创建pool对象,最多创建5个线程def callback(status, result):passdef action(i):time.sleep(1)print(i)for i in range(30): # 共30个任务ret = pool.run(action, (i,), callback=None) # 将action函数,及action的参数,callback函数传给run()方法pool.close() View Code

 

例子:

 

转载于:https://www.cnblogs.com/xiaoming279/p/6626768.html

总结

以上是生活随笔为你收集整理的脚本检测CDN节点资源是否与源站资源一致的全部内容,希望文章能够帮你解决所遇到的问题。

如果觉得生活随笔网站内容还不错,欢迎将生活随笔推荐给好友。