股票池设计1——特征工程
from jqdatasdk import *
import seaborn as sns
import math
import datetime
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
1.1、财务数据——总市值和PE
def load_fundamentals_data(context):
'''
加载股票的财务数据,包括总市值和PE
'''
df = get_fundamentals(query(valuation,indicator), context.current_dt.strftime("%Y-%m-%d"))
raw_data = []
for index in range(len(df['code'])):
raw_data_item = {
'code' :df['code'][index],
'market_cap':df['market_cap'][index],
'pe_ratio' :df['pe_ratio'][index]
}
raw_data.append(raw_data_item)
return raw_data
df=get_fundamentals(query(valuation))
df = get_fundamentals(query(valuation,indicator))
raw_data = []
for index in range(len(df['code'])):
raw_data_item = {
'code' :df['code'][index],
'market_cap':df['market_cap'][index],
'pe_ratio' :df['pe_ratio'][index]
}
raw_data.append(raw_data_item)
# 按照财务信息中的总市值降序排序
raw_data = sorted(raw_data,key = lambda item:item['market_cap'], reverse=True)
# 剔除总市值排名最小的10%的股票
fitered_market_cap = raw_data[:int(len(raw_data) * 0.9)]
# 剔除PE TTM 小于0或大于100
filtered_pe = []
for stock in fitered_market_cap:
if stock['pe_ratio'] == None or math.isnan(stock['pe_ratio']) or float(stock['pe_ratio']) < 0 or float(stock['pe_ratio']) > 100:
continue
filtered_pe.append(stock['code'])
#PE数据集
df_pe=pd.DataFrame(fitered_market_cap)
1.2、行情数据——25日涨跌幅
current_dt=datetime.datetime.now().strftime('%Y-%m-%d')
CHANGE_PCT_DAY_NUMBER=25
def load_change_pct_data(current_dt,codes):
change_pct_dict_list = []
# 计算涨跌幅需要用到前一日收盘价,所以需要多加载一天的数据,
# 而这里在第二日的开盘前运行,计算前一个交易日收盘后的数据,所以需要再多加载一天
# 使用固定的25个交易日,而非25个bar计算涨跌幅
count = CHANGE_PCT_DAY_NUMBER + 1
# 获取25个交易日的日期
pre_25_dates = get_trade_days(start_date=None, end_date=current_dt, count=count)
pre_25_date = pre_25_dates[0]
pre_1_date = pre_25_dates[-1]
for code in codes:
pre_25_data = get_price(code, start_date=None, end_date=pre_25_date, frequency='daily', fields=['close'], skip_paused=True, fq='post', count=1)
pre_1_data = get_price(code, start_date=None, end_date=pre_1_date, frequency='daily', fields=['close'], skip_paused=True, fq='post', count=1)
pre_25_close = None
pre_1_close = None
if str(pre_25_date) == str(pre_25_data.index[0])[:10]:
pre_25_close = pre_25_data['close'][0]
if str(pre_1_date) == str(pre_1_data.index[0])[:10]:
pre_1_close = pre_1_data['close'][0]
if pre_25_close != None and pre_1_close != None and not math.isnan(pre_25_close) and not math.isnan(pre_1_close):
change_pct = (pre_1_close - pre_25_close) / pre_25_close
item = {'code':code, 'change_pct': change_pct}
change_pct_dict_list.append(item)
return change_pct_dict_list
def load_change_pct_data_1(current_dt,code):
change_pct_dict_list = []
# 计算涨跌幅需要用到前一日收盘价,所以需要多加载一天的数据,
# 而这里在第二日的开盘前运行,计算前一个交易日收盘后的数据,所以需要再多加载一天
# 使用固定的25个交易日,而非25个bar计算涨跌幅
count = CHANGE_PCT_DAY_NUMBER + 1
# 获取25个交易日的日期
pre_25_dates = get_trade_days(start_date=None, end_date=current_dt, count=count)
pre_25_date = pre_25_dates[0]
pre_1_date = pre_25_dates[-1]
pre_25_data = get_price(code, start_date=None, end_date=pre_25_date, frequency='daily', fields=['close'], skip_paused=True, fq='post', count=1)
pre_1_data = get_price(code, start_date=None, end_date=pre_1_date, frequency='daily', fields=['close'], skip_paused=True, fq='post', count=1)
pre_25_close = None
pre_1_close = None
if str(pre_25_date) == str(pre_25_data.index[0])[:10]:
pre_25_close = pre_25_data['close'][0]
if str(pre_1_date) == str(pre_1_data.index[0])[:10]:
pre_1_close = pre_1_data['close'][0]
if pre_25_close != None and pre_1_close != None and not math.isnan(pre_25_close) and not math.isnan(pre_1_close):
change_pct = (pre_1_close - pre_25_close) / pre_25_close
item = {'code':code, 'change_pct': change_pct}
change_pct_dict_list.append(item)
return change_pct_dict_list
# 加载标的的涨跌幅信息
change_pct_dict_list = load_change_pct_data(current_dt,filtered_pe)
# 按照涨跌幅升序排序
change_pct_dict_list = sorted(change_pct_dict_list,key = lambda item:item['change_pct'], reverse=False)
# 取跌幅前10%的股票
change_pct_dict_list = change_pct_dict_list[0:(int(len(change_pct_dict_list)*0.1))]
change_pct_dict_list=[]
for i in tqdm(filtered_pe):
change_pct_dict_list.append(load_change_pct_data_1(current_dt,i))
change_pct_dict_df1=[]
for i in change_pct_dict_list:
if i!=[]:
change_pct_dict_df1.append(i[0])
change_pct_dict_df=pd.DataFrame(change_pct_dict_df1)
1.3财务数据-资产负债率
##资产负债率ratio_liability_vs_assets=总负债total_liability/总资产total_assets
##合并资产负债表
q=query(finance.STK_BALANCE_SHEET.company_name,
finance.STK_BALANCE_SHEET.code,
finance.STK_BALANCE_SHEET.pub_date,
finance.STK_BALANCE_SHEET.start_date,
finance.STK_BALANCE_SHEET.end_date,
finance.STK_BALANCE_SHEET.cash_equivalents,
finance.STK_BALANCE_SHEET.total_assets,
finance.STK_BALANCE_SHEET.total_liability
).filter(finance.STK_BALANCE_SHEET.pub_date>='2019-01-01',finance.STK_BALANCE_SHEET.report_type==0)
df=finance.run_query(q)
df['ratio_liability_vs_assets']=df['total_liability']/df['total_assets']
df['rank_1']=df.sort_values(['pub_date', 'end_date'], ascending=[ False, False]).groupby(['code']).cumcount() + 1
df2=df[df['rank_1']==1]
df2['ratio_liability_vs_assets_seg']=pd.cut(df2['ratio_liability_vs_assets'],[0,0.3,0.4,0.6,0.8,1,10])
pd.value_counts(df2['ratio_liability_vs_assets_seg'])
(0.0, 0.3] 996
(0.4, 0.6] 920
(0.3, 0.4] 517
(0.6, 0.8] 470
(0.8, 1.0] 137
(1.0, 10.0] 8
Name: ratio_liability_vs_assets_seg, dtype: int64
df2['ratio_liability_vs_assets'].describe()
count 3048.000000
mean 0.422930
std 0.266916
min 0.009660
25% 0.251521
50% 0.402005
75% 0.562507
max 8.257912
Name: ratio_liability_vs_assets, dtype: float64
df3=df2[(df2['ratio_liability_vs_assets']<=1)]['ratio_liability_vs_assets']
sns.distplot(df3)
#格式转换
df2['zs1']=df2.ratio_liability_vs_assets_seg.astype(str)
#取资产负债率和25日跌幅
df3=pd.merge(df2,change_pct_dict_df,on='code',how='inner')
df4=df3[['company_name','code','ratio_liability_vs_assets','change_pct']]
# 资产负债率与25日跌幅相关性分析
def plot_corr(df,col_list,threshold=None,plt_size=None,is_annot=True):
"""
df:数据集
col_list:变量list集合
threshold: 相关性设定的阈值
plt_size:图纸尺寸
is_annot:是否显示相关系数值
return :相关性热力图
"""
corr_df = df.loc[:,col_list].corr()
plt.figure(figsize=plt_size)
sns.heatmap(corr_df,annot=is_annot,cmap='rainbow',vmax=1,vmin=-1,mask=np.abs(corr_df)<=threshold)
return plt.show()
# 剔除PE TTM 小于0或大于100
df_pe=df_pe[(df_pe.pe_ratio>=0)&(df_pe.pe_ratio<=100)]
#df4加入pe_ratio
df5=pd.merge(df4,df_pe,on='code',how='inner')
1.4财务数据——流动比率
##流动比率 current_ratio =流动资产合计 total_current_assets/流动负债合计 total_current_liability
##合并资产负债表
q=query(finance.STK_BALANCE_SHEET.company_name,
finance.STK_BALANCE_SHEET.code,
finance.STK_BALANCE_SHEET.pub_date,
finance.STK_BALANCE_SHEET.start_date,
finance.STK_BALANCE_SHEET.end_date,
finance.STK_BALANCE_SHEET.total_current_assets,
finance.STK_BALANCE_SHEET.total_current_liability
).filter(finance.STK_BALANCE_SHEET.pub_date>='2018-01-01',finance.STK_BALANCE_SHEET.report_type==0)
df_current=finance.run_query(q)
df_current['current_ratio']=df_current['total_current_assets']/df_current['total_current_liability']
#取最新一份财报数据
df_current['rank_1']=df_current.sort_values(['pub_date', 'end_date'], ascending=[ False, False]).groupby(['code']).cumcount() + 1
df_current=df_current[df_current.rank_1==1]
# df_current=df_current[['code','current_ratio']].dropna()
df_current=df_current[['code','current_ratio']]
#合并到特征表
df6=pd.merge(df5,df_current,on='code',how='left')
#相关性分析
plot_corr(df6,['ratio_liability_vs_assets','change_pct','market_cap','pe_ratio','current_ratio'])
df_dist=df6[df6.change_pct>0.2]['current_ratio'].dropna()
| company_name | code | ratio_liability_vs_assets | change_pct | market_cap | pe_ratio | current_ratio | |
|---|---|---|---|---|---|---|---|
| 0 | 兰州民百(集团)股份有限公司 | 600738.XSHG | 0.471485 | -0.230364 | 32.5629 | 6.0867 | 0.972715 |
| 1 | 山东沃华医药科技股份有限公司 | 002107.XSHE | 0.245668 | 0.221279 | 40.8015 | 40.0212 | NaN |
| 2 | 江西金力永磁科技股份有限公司 | 300748.XSHE | 0.464905 | -0.204534 | 128.5336 | 81.9638 | NaN |
| 3 | 青岛农村商业银行股份有限公司 | 002958.XSHE | 0.927440 | -0.069841 | 319.4444 | 11.3078 | NaN |
| 4 | 上海永冠众诚新材料科技(集团)股份有限公司 | 603681.XSHG | 0.319237 | -0.167230 | 36.5502 | 27.2604 | NaN |
| 5 | 厦门光莆电子股份有限公司 | 300632.XSHE | 0.479532 | -0.092742 | 44.6479 | 25.7679 | 2.763980 |
| 6 | 奥美医疗用品股份有限公司 | 002950.XSHE | 0.544276 | 1.079046 | 208.6398 | 62.8040 | NaN |
| 7 | 宁波热电股份有限公司 | 600982.XSHG | 0.410976 | -0.107203 | 29.1234 | 31.6918 | 2.441460 |
| 8 | 中航重机股份有限公司 | 600765.XSHG | 0.617861 | -0.055741 | 88.5990 | 27.0453 | 1.193113 |
| 9 | 昆山新莱洁净应用材料股份有限公司 | 300260.XSHE | 0.621686 | 0.023700 | 26.3128 | 49.3646 | 1.528724 |
| 10 | 云南鸿翔一心堂药业(集团)股份有限公司 | 002727.XSHE | 0.449485 | -0.112977 | 119.8563 | 19.8512 | 1.990085 |
| 11 | 西安蓝晓科技新材料股份有限公司 | 300487.XSHE | 0.478631 | -0.013625 | 80.9049 | 28.2361 | 2.688508 |
| 12 | 江苏新泉汽车饰件股份有限公司 | 603179.XSHG | 0.587382 | 0.042727 | 50.0095 | 27.0734 | 1.209574 |
| 13 | 苏州迈为科技股份有限公司 | 300751.XSHE | 0.580113 | 0.030505 | 104.0000 | 41.9587 | NaN |
| 14 | 上海瀚讯信息技术股份有限公司 | 300762.XSHE | 0.402701 | -0.108459 | 67.7469 | 62.4448 | NaN |
| 15 | 山东民和牧业股份有限公司 | 002234.XSHE | 0.572745 | -0.105084 | 83.9388 | 5.6186 | NaN |
| 16 | 深圳市飞荣达科技股份有限公司 | 300602.XSHE | 0.381029 | 0.210325 | 170.0924 | 47.3213 | 4.174113 |
| 17 | 杭州福斯特应用材料股份有限公司 | 603806.XSHG | 0.139437 | -0.053945 | 244.7336 | 24.5508 | 6.849038 |
| 18 | 内蒙古伊利实业集团股份有限公司 | 600887.XSHG | 0.411059 | -0.137787 | 1762.4631 | 25.0969 | NaN |
| 19 | 上海悦心健康集团股份有限公司 | 002162.XSHE | 0.582459 | -0.093400 | 28.1746 | 74.3437 | 0.691728 |
| 20 | 深圳市联得自动化装备股份有限公司 | 300545.XSHE | 0.490755 | -0.073194 | 47.6353 | 59.1575 | 1.943353 |
| 21 | 申万宏源集团股份有限公司 | 000166.XSHE | 0.795252 | -0.065642 | 1171.8694 | 20.9062 | NaN |
| 22 | 上海富瀚微电子股份有限公司 | 300613.XSHE | 0.130383 | -0.095772 | 76.0006 | 84.4895 | 8.161842 |
| 23 | 北京昭衍新药研究中心股份有限公司 | 603127.XSHG | 0.429196 | 0.033820 | 115.8378 | 90.7828 | 1.918646 |
| 24 | 凌源钢铁股份有限公司 | 600231.XSHG | 0.534481 | -0.141450 | 64.0120 | 18.7002 | 0.793143 |
| 25 | 广东广州日报传媒股份有限公司 | 002181.XSHE | 0.097676 | -0.161290 | 58.5173 | 69.7058 | 6.912332 |
| 26 | 青岛汇金通电力设备股份有限公司 | 603577.XSHG | 0.548940 | -0.092017 | 20.1170 | 39.6529 | 1.659044 |
| 27 | 石药集团新诺威制药股份有限公司 | 300765.XSHE | 0.281458 | -0.014568 | 71.7000 | 26.2908 | NaN |
| 28 | 东方财富信息股份有限公司 | 300059.XSHE | 0.605756 | 0.105098 | 1165.1581 | 75.0578 | 1.608532 |
| 29 | 中设设计集团股份有限公司 | 603018.XSHG | 0.625911 | 0.016099 | 49.9201 | 11.1978 | 1.443960 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 2126 | 山东好当家海洋发展股份有限公司 | 600467.XSHG | 0.497659 | -0.101710 | 32.2880 | 50.5187 | 0.697399 |
| 2127 | 福建福能股份有限公司 | 600483.XSHG | 0.504333 | -0.017453 | 132.6818 | 10.2428 | 1.695981 |
| 2128 | 天津天药药业股份有限公司 | 600488.XSHG | 0.307980 | 0.215105 | 55.4678 | 34.7858 | 1.644740 |
| 2129 | 烽火通信科技股份有限公司 | 600498.XSHG | 0.632396 | 0.098594 | 351.0612 | 42.1872 | 1.491932 |
| 2130 | 腾达建设集团股份有限公司 | 600512.XSHG | 0.568835 | 0.055225 | 48.9264 | 9.0598 | 2.277506 |
| 2131 | 南京栖霞建设股份有限公司 | 600533.XSHG | 0.775681 | -0.071714 | 32.6550 | 5.4489 | 2.137236 |
| 2132 | 卓郎智能技术股份有限公司 | 600545.XSHG | 0.584635 | -0.144033 | 102.3523 | 15.9659 | 2.370016 |
| 2133 | 江苏康缘药业股份有限公司 | 600557.XSHG | 0.296340 | -0.088373 | 80.7504 | 16.2524 | 1.642700 |
| 2134 | 四川大西洋焊接材料股份有限公司 | 600558.XSHG | 0.275038 | -0.119258 | 25.8510 | 77.7262 | 1.785506 |
| 2135 | 河北衡水老白干酒业股份有限公司 | 600559.XSHG | 0.465842 | -0.170685 | 84.9731 | 22.5027 | NaN |
| 2136 | 北京京能电力股份有限公司 | 600578.XSHG | 0.596164 | -0.110699 | 183.5112 | 12.9139 | 0.485902 |
| 2137 | 新疆八一钢铁股份有限公司 | 600581.XSHG | 0.794028 | -0.110823 | 47.8264 | 13.1655 | 0.238045 |
| 2138 | 天地科技股份有限公司 | 600582.XSHG | 0.446959 | -0.043739 | 126.6408 | 11.6222 | 2.126360 |
| 2139 | 安徽海螺水泥股份有限公司 | 600585.XSHG | 0.194822 | 0.060494 | 2907.1975 | 8.8328 | 2.582034 |
| 2140 | 云赛智联股份有限公司 | 600602.XSHG | 0.229199 | -0.051843 | 112.5595 | 40.8208 | 2.673322 |
| 2141 | 上海市北高新股份有限公司 | 600604.XSHG | 0.542533 | -0.158325 | 135.4399 | 67.1801 | 3.477707 |
| 2142 | 山西省国新能源股份有限公司 | 600617.XSHG | 0.849032 | -0.157553 | 43.4950 | 93.9236 | NaN |
| 2143 | 上海华谊集团股份有限公司 | 600623.XSHG | 0.543544 | -0.136203 | 121.4756 | 15.3251 | 1.142360 |
| 2144 | 上海申达股份有限公司 | 600626.XSHG | 0.631430 | 0.046837 | 55.2285 | 13.8919 | NaN |
| 2145 | 浙报数字文化集团股份有限公司 | 600633.XSHG | 0.126146 | 0.024541 | 130.5830 | 26.5269 | NaN |
| 2146 | 乐山电力股份有限公司 | 600644.XSHG | 0.525809 | -0.111629 | 26.5432 | 29.7453 | 0.541703 |
| 2147 | 上海外高桥集团股份有限公司 | 600648.XSHG | 0.612917 | -0.159757 | 166.6693 | 18.3664 | NaN |
| 2148 | 北京电子城投资开发集团股份有限公司 | 600658.XSHG | 0.508053 | -0.072935 | 52.5735 | 21.4715 | 3.305817 |
| 2149 | 上海强生控股股份有限公司 | 600662.XSHG | 0.486859 | -0.158337 | 40.8705 | 23.0654 | 0.917287 |
| 2150 | 天地源股份有限公司 | 600665.XSHG | 0.841852 | -0.086732 | 28.1704 | 8.9896 | 1.904141 |
| 2151 | 无锡市太极实业股份有限公司 | 600667.XSHG | 0.597682 | 0.308733 | 253.5853 | 38.5628 | 1.254031 |
| 2152 | 中华企业股份有限公司 | 600675.XSHG | 0.658932 | -0.138149 | 254.8185 | 6.9895 | 1.284974 |
| 2153 | 上海交运集团股份有限公司 | 600676.XSHG | 0.297939 | -0.188295 | 39.9055 | 18.4038 | 2.270599 |
| 2154 | 南京新街口百货商店股份有限公司 | 600682.XSHG | 0.359603 | -0.136468 | 122.6326 | 9.7458 | 0.750177 |
| 2155 | 中国石化上海石油化工股份有限公司 | 600688.XSHG | 0.277847 | 0.005398 | 432.9525 | 18.9153 | 1.818862 |
2156 rows × 7 columns

