import pymongo
import json
import tushare as ts
import pandas as pd
import logging
import datetime
import time
client = MongoClient('127.0.0.1', port=27017)
db = client.tushare_database
def clear_database():
db.drop_collection('hist_data')
def get_hist_data_day(stock,start,end):
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.debug("CALL get_hist_data_day <%s,%s,%s>",stock,start,end)
collection = db.hist_data
result = []
mx_time = None
mn_time = None
for bar in collection.find({'stock':stock,'ktype':'D'}).sort("date",pymongo.ASCENDING).limit(1):
mn_time = bar['date']
for bar in collection.find({'stock':stock,'ktype':'D'}).sort("date",pymongo.DESCENDING).limit(1):
mx_time = bar['date']
if mx_time != None and mx_time < start:
_start = start
start = mx_time
tt = end
delta_time = datetime.timedelta(days=20)
start_time = datetime.datetime.strptime(start,'%Y-%m-%d')
end_time = datetime.datetime.strptime(end,'%Y-%m-%d')
#若数据库非空,则从数据库生成表头
www = collection.find_one({'stock':stock,'ktype':'D'})
if www != None:
result = dict()
for w in www.keys():
if (w=='_id'):
continue
result[w] = []
header = pd.DataFrame(result)
df = header
current_time = end_time
else:
df = ts.get_hist_data(stock,end,end)
current_time = end_time-datetime.timedelta(days=1)
#遍历时间区间,并且将数据库中缺失部分补充完整
while current_time >= start_time:
prev_time = current_time - datetime.timedelta(days=19)
if mn_time == None or (not (prev_time.strftime('%Y-%m-%d') >= mn_time
and current_time.strftime('%Y-%m-%d') <= mx_time)):
dff = ts.get_hist_data(stock,
prev_time.strftime('%Y-%m-%d'),
current_time.strftime('%Y-%m-%d'))
df = df.append(dff)
current_time = current_time - datetime.timedelta(days=20)
store_data = json.loads(df.to_json(orient='records'))
for i in range(0,len(store_data)):
www = collection.find_one({'stock':stock,'date':df.index[i],'ktype':'D'})
if www != None:
continue
store_data[i]['stock'] = stock
store_data[i]['date'] = df.index[i]
store_data[i]['ktype'] = 'D'
collection.insert(store_data[i])
www = collection.find_one()
result = dict()
for w in www.keys():
if (w=='_id'):
continue
result[w] = []
for item in collection.find({'stock':stock,'ktype':'D','date':{'$lte':end,'$gte':start}}):
for w in item.keys():
if (w=='_id'):
continue
result[w].append(item[w])
result = pd.DataFrame(result)
return result
def get_hist_data_sp(stock,date,ktype='D'):
logger = logging.getLogger(__name__)
logger.debug("CALL get_hist_data_sp <%s,%s,%s>",stock,date,ktype)
collection = db.hist_data
ww = collection.find_one({'stock':stock,'date':date,'ktype':ktype})
if (ww == None):
logger.debug("ReFetch... <%s,%s>",stock,date)
df = ts.get_hist_data(stock,start = date,ktype=ktype)
store_data = json.loads(df.to_json(orient='records'))
for i in range(0,len(store_data)):
www = collection.find_one({'stock':stock,'date':df.index[i],'ktype':ktype})
if (www!=None):
continue
store_data[i]['stock'] = stock
store_data[i]['date'] = df.index[i]
store_data[i]['ktype'] = ktype
collection.insert(store_data[i])
else:
logger.debug("Use cache..")
ww = collection.find_one({'stock':stock,'date':date,'ktype':ktype})
if ww == None:
ww = collection.find_one()
result = dict()
for w in ww.keys():
if (w=='_id'):
continue
result[w] = []
for item in collection.find({'stock':stock,'date':date,'ktype':ktype}):
for w in item.keys():
if (w=='_id'):
continue
result[w].append(item[w])
result = pd.DataFrame(result)
return result
if __name__ == '__main__':
#clear_database()
stock = '002082'
date = '2016-10-30'
result = get_hist_data_day(stock,'2016-11-01','2017-01-01')