当前位置:
首页 >
爬虫——————爬取中金所,深交所,上交所期权数据
发布时间:2023/12/19
53
豆豆
生活随笔
收集整理的这篇文章主要介绍了
爬虫——————爬取中金所,深交所,上交所期权数据
小编觉得挺不错的,现在分享给大家,帮大家做个参考.
先从深交所开始:直接上传源码:
from bs4 import BeautifulSoup from lxml import etree import pandas as pd import akshare as ak import datetime import requests import csv from contextlib import closing import time from urllib.request import urlopen import requests from urllib import request from io import BytesIO import gzip import random#设定获取数据的日期 date = ak.tool_trade_date_hist_sina() date =date.loc[date['trade_date']>='2019-01-01'] df1 = pd.DataFrame()for j in date['trade_date']:print(j)#session = requests.Session()# s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')# s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')#url = 'http://www.szse.com/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=option_hyfxzb&TABKEY=tab1&txtSearchDate=%(j)s&random=%(r)s'%{'j':j,'r':random.random()}#'http://query.sse.com.cn/derivative/downloadRisk.do?trade_date=%(YM)s%(D)s&productType=0'%{'YM':s,'D':s2}url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=option_hyfxzb&TABKEY=tab1&txtSearchDate=%(j)s&random=0.5379373345285146'%{'j':j}print(url)response = requests.get(url)#print(response.content) """ 这一块本人很不满意,先保存再读取,多此一举。主要是因为直接显示发现乱码,本人无论如何都无法解析为正常结果, 只能先放到xlsx,之后重新读取保存。请诸位大虾见到给小弟一点帮助,如何解决。多谢!!!!!!!!!! """"with open('D:/结果存放3.xlsx', 'ab') as file_handle: file_handle.write(response.content) # 写入# file_handle.write('\n')df= pd.read_excel('D:/结果存放3.xlsx')df['trade_date'] = jdf1 = df1.append(df)df1.to_csv('szse.csv')爬取上交所
import csv from contextlib import closing import time from urllib.request import urlopen date = ak.tool_trade_date_hist_sina() date =date.loc[date['trade_date']>='2019-01-01'] df1 = pd.DataFrame() #//query.sse.com.cn/derivative/downloadRisk.do?trade_date=20201207&productType=0 for j in date['trade_date']:s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')url = 'http://query.sse.com.cn/derivative/downloadRisk.do?trade_date=%(YM)s%(D)s&productType=0'%{'YM':s,'D':s2}# 读取数据with closing(requests.get(url, stream=True)) as r:f = (line.decode('gbk') for line in r.iter_lines())reader = csv.reader(f,delimiter=',', quotechar=',')for row in reader:print(row)#print(row.reverse())df = pd.DataFrame(row)df1=df1.append(df.T)df1.to_csv('sse.csv')爬取中金所
import datetime import requests from lxml import etree import pandas as pd import akshare as ak import time date = ak.tool_trade_date_hist_sina() date =date.loc[date['trade_date']>='2019-01-01'] df1 = pd.DataFrame() for j in date['trade_date']:s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')url = 'http://www.cffex.com.cn/sj/hqsj/rtj/%(YM)s/%(D)s/index.xml?id=39'%{'YM':s,'D':s2}response = requests.get(url)p = etree.HTML((response.content))df = pd.DataFrame()for i in range(1,len(p.xpath('//dailydata'))):#print('//dailydata[{}]/instrumentid/text()'.format(i))# print(p.xpath('//dailydata[{}]/instrumentid/text()'.format(i)))#df.loc[i,'instrument']=p.xpath('//dailydata[{}]/instrumentid/text()'.format(i))try:#print((p.xpath('//dailydata[{}]/instrumentid[1]/text()'))[i])df.loc[i,'instrumentid']=(p.xpath('//dailydata[{}]/instrumentid/text()'.format(i)))except:df.loc[i,'instrumentid']=0try:df.loc[i,'tradingday']=(p.xpath('//dailydata[{}]/tradingday/text()'.format(i)))except:df.loc[i,'tradingday']=0try:df.loc[i,'openprice']=(p.xpath('//dailydata[{}]/openprice/text()'.format(i)))except:df.loc[i,'openprice']=0try:df.loc[i,'highestprice']=(p.xpath('//dailydata[{}]/highestprice/text()'.format(i)))except:df.loc[i,'highestprice'] =0try:df.loc[i,'lowestprice']=(p.xpath('//dailydata[{}]/lowestprice/text()'.format(i)))except:df.loc[i,'lowestprice']=0try:df.loc[i,'closeprice']=(p.xpath('//dailydata[{}]/closeprice/text()'.format(i)))except:df.loc[i,'closeprice'] = 0try:df.loc[i,'preopeninterest']=(p.xpath('//dailydata[{}]/preopeninterest/text()'.format(i)))except:df.loc[i,'preopeninterest'] = 0try:df.loc[i,'openinterest']=(p.xpath('//dailydata[{}]/openinterest/text()'.format(i)))except:df.loc[i,'openinterest'] = 0try:df.loc[i,'presettlementprice']=(p.xpath('//dailydata[{}]/presettlementprice/text()'.format(i)))except:df.loc[i,'presettlementprice'] = 0try:df.loc[i,'settlementpriceif']=(p.xpath('//dailydata[{}]/settlementpriceif/text()'.format(i)))except:df.loc[i,'settlementpriceif'] = 0try:df.loc[i,'settlementprice']=(p.xpath('//dailydata[{}]/settlementprice/text()'.format(i)))except:df.loc[i,'settlementprice'] = 0try:df.loc[i,'volume']=(p.xpath('//dailydata[{}]/volume/text()'.format(i)))except:df.loc[i,'volume'] = 0try:df.loc[i,'turnover']=(p.xpath('//dailydata[{}]/turnover/text()'.format(i)))except:df.loc[i,'turnover'] = 0try:df.loc[i,'productid']=(p.xpath('//dailydata[{}]/productid/text()'.format(i)))except:df.loc[i,'productid'] = 0try:df.loc[i,'delta']=(p.xpath('//dailydata[{}]/delta/text()'.format(i)))except:df.loc[i,'delta'] = 0try:df.loc[i,'expiredate']=(p.xpath('//dailydata[i]/expiredate/text()'.format(i)))except:df.loc[i,'expiredate'] = 0df1 = df1.append(df)df1.to_csv('cffex.csv')以上是爬取三大交易所期权数据的源代码,可以直接使用,也可以修改保存至数据库。
总结
以上是生活随笔为你收集整理的爬虫——————爬取中金所,深交所,上交所期权数据的全部内容,希望文章能够帮你解决所遇到的问题。
- 上一篇: JAVA中protected的作用
- 下一篇: IDEA常用快捷键【win-mac对比】