Source code for tushare_easy.utils

"""tushare_easy utils

"""
from __future__ import print_function
import os
from warnings import warn
import arrow
import pandas as pd
import tushare as ts
from unipath import Path
from . import consts as CONSTS


[docs]def get_codes(index=False): if index: return CONSTS.index_prefix + \ ts.get_index()['code'].values return ts.get_stock_basics().index.values
[docs]def get_data(code, ktype='d', start=None, end=None): if start is None: start = '' if end is None: end = '' print(code, ktype) if code.startswith(CONSTS.index_prefix): code = code[1:] return ts.get_k_data(code, ktype=ktype, start=start, end=end, index=True, retry_count=CONSTS.retry_count, pause=CONSTS.pause) return ts.get_k_data(code, ktype=ktype, start=start, end=end, retry_count=CONSTS.retry_count, pause=CONSTS.pause)
[docs]def set_date_index(df, labels=CONSTS.index_labels_date): """ set date as df's index Parameters ---------- df : pandas.DataFrame data getting from `tushare.get_k_data` labels : list-like labels using to set as df index, sequenced preferred Returns ------- pandas.DataFrame data with index set by a label in labels """ for label in labels: if label in df.columns: df = df.set_index(label) df.index = pd.DatetimeIndex(df.index) return df else: raise Exception('COLUMNS NAME NOT FOUNDED IN %r' % labels)
[docs]def prep(df,): df.pop('code') df = df.drop(df.index[-1]) return set_date_index(df)
[docs]def get_demo(df): return df.head(1).append(df.tail(1))
[docs]def get_demo_new(df_last, df_newer): return df_last.head(1).append(df_newer.tail(1))
[docs]def read_data(filepath, header=0): """ read data from file always with index, with header default Parameters ---------- filepath : str header : int or None Returns ------- `pandas.DataFrame` """ return pd.read_table(filepath, parse_dates=True, index_col=0, header=header)
[docs]def save(df, filepath, mode='w', header=False): df.to_csv(filepath, header=header, sep='\t', mode=mode)
[docs]def fmt_filename(name_list): """ format filename Parameters ---------- name_list : list of str four elements list: [code, start, end, ktype] Returns ------- str filename """ return CONSTS.filename_sep.join(name_list)
[docs]def fmt_filename_demo(name_list): name_list.append(CONSTS.demo) return CONSTS.filename_sep.join(name_list)
[docs]def split_filename(filename): return filename.split(CONSTS.filename_sep)
[docs]def extract_start_end(df): """ format start and end time Parameters ---------- df : pd.DataFrame data with setting time as index time format: '%Y-%m-%d %H:%M' or '%Y-%m-%d' Returns ------- start, end : str datetime string, format: '%Y-%m-%d %H:%M' """ fmt_time = '%Y-%m-%d-%H-%M' start = df.index[0].strftime(fmt_time) end = df.index[-1].strftime(fmt_time) return start, end
[docs]def get_local(code, ktype='d', path='.'): """get last file and demo Parameters ---------- code : str ktype : {'d', '5', '15', '30', '60', 'w', 'm'} path : str where to search file Returns ------- filename_demo : filename of demo """ path = Path(path) pattern = CONSTS.filename_sep.join([ktype, CONSTS.demo]) pattern = '%s%s*%s%s' % (code, CONSTS.filename_sep, CONSTS.filename_sep, pattern) file_list = path.listdir(pattern, names_only=True) if len(file_list) == 1: return file_list[0] elif not file_list: # empty means False return warn('NO ITEM') else: raise Exception('MORE THAN 1 ITEM')
[docs]def get_arrow(strftime): """ get `arrow.arrow.Arrow` from str with local timezone info Parameters ---------- strftime : str format '%Y-%m-%d-%H-%M' Returns ------- `arrow.arrow.Arrow` """ time_tz = strftime + CONSTS.tz_local return arrow.get(time_tz, CONSTS.time_fmt)
[docs]def get_end_date(ktype='d'): """ get up-to-date time of market Parameters ---------- ktype : {'d', '5', '15', '30', '60', 'w', 'm'} Returns ------- end_datetime : `arrow.arrow.Arrow` """ today = arrow.utcnow() today = today.to(CONSTS.tz_local) start = today.shift(**CONSTS.datetime_shift_search[ktype]) date_fmt = 'YYYY-MM-DD' start = start.format(date_fmt) end = today.format(date_fmt) df = ts.get_k_data(CONSTS.code_std, index=True, ktype=ktype, start=start, end=end) if df.empty: raise Exception('NO DATA FOUNDED') df = prep(df) if df.empty: return df _, end_std = extract_start_end(df) return get_arrow(end_std)
[docs]def get_threshhold_datetime(time_str, ktype='d'): """ get threshhold of recent time locally Parameters ---------- time_str : str format '%Y-%m-%d-%H-%M' ktype : {'d', '5', '15', '30', '60', 'w', 'm'} Returns ------- threshhold_datetime : `arrow.arrow.Arrow` """ time_arrow = get_arrow(time_str) time_arrow = time_arrow.shift(**CONSTS.datetime_shift[ktype]) if ktype == '60': return time_arrow.ceil('hour') elif ktype == 'd': return time_arrow.ceil('day') elif ktype == 'w': return time_arrow.ceil('week') elif ktype == 'm': return time_arrow.ceil('month') elif ktype in ['5', '15', '30']: return time_arrow.ceil('minute') else: raise Exception('KTYPE ERROR')
[docs]def is_up_to_date(end_local_str, ktype='d',): """ check whether local data is up-to-date Parameters ---------- end_local_str : str format '%Y-%m-%d-%H-%M' ktype : {'d', '5', '15', '30', '60', 'w', 'm'} Returns ------- boolean """ # saved last date end_local = get_threshhold_datetime(end_local_str, ktype) end_std = get_end_date(ktype) if end_std > end_local: return False return True
[docs]def down2save(code, ktype='d', start=None, end=None, path='.'): cwd = Path(path) cwd.chdir() df = get_data(code, ktype, start, end) if df.empty: return df df = prep(df) if df.empty: return df df_demo = get_demo(df) # print(df_demo) start, end = extract_start_end(df_demo) name_list = [code, start, end, ktype] filename = fmt_filename(name_list) filename_demo = fmt_filename_demo(name_list) save(df, filename) save(df_demo, filename_demo, header=True)
[docs]def down2save_update(code, ktype='d', start=None, end=None, path='.'): """ get k_chart date and save them locally, and update them. get k_chart data using `tushare.get_k_data` save data locally with tab separated files could append data line by line if getting newer Parameters ---------- code : str ktype : format '%Y-%m-%d-%H-%M' start, end : str or None `datetime.datetime` format '%Y-%m-%d-%H-%M' e.g. '2017-12-12-23-59' path : str where to search files locally Returns ------- """ cwd = Path(path) cwd.chdir() filename_demo_local = get_local(code, ktype, path) if not isinstance(filename_demo_local, str): return down2save(code, ktype, start, end, path) df_demo_local = read_data(filename_demo_local) # print(df_demo_local) start_local, end_local = extract_start_end(df_demo_local) if is_up_to_date(end_local, ktype): print('data up to date') return name_list_demo_local = split_filename(filename_demo_local) filename_local = fmt_filename(name_list_demo_local[:-1]) df = get_data(code, ktype, start, end) if df.empty: return df df = prep(df) if df.empty: return df df_newer = df.loc[df.index > df_demo_local.index[-1]] if df_newer.empty: return df_newer df_demo_new = get_demo_new(df_demo_local, df_newer) # print(df_demo_new) _, end_newer = extract_start_end(df_demo_new) filename_list_new = [code, start_local, end_newer, ktype] filename_new = fmt_filename(filename_list_new) save(df_newer, filename_local, mode='a') os.rename(filename_local, filename_new) print('rename %s %s' % (filename_local, filename_new)) filename_demo_new = fmt_filename_demo(filename_list_new) save(df_demo_new, filename_demo_new, header=True) os.remove(filename_demo_local) print('remove %s' % filename_demo_local)