PostgreSQL 线性回归 - 股价预测 1

9 minute read

背景

本文是一个简单的一元线性回归的例子, 需要了解相关理论知识可参考 :

《在PostgreSQL中用线性回归分析(linear regression) - 实现数据预测》

数据来自股票的收盘价.

如果要做多元分析, 请自行增加维度(例如大盘指数,成交量,最低,最高价,价格区间等等都可以那里做回归计算), 计算期望值.

例子如下 :

按实际顺序录入test, 如时间从小到大, ID从小到大, 最终目标是用前一个值预测下一个值

第一条记录是最老的值, 最后一条记录是最新的值, 不要搞反了.

create table test(id int, cnt numeric);  

以下是网友提供的阿里巴巴的收盘价数据

格式 date,close,volume,open,high,low

vi ali.txt  
2015/3/3,81.58,39173430,82.95,83.25,80.03  
2015/3/2,84,11175870,85,85.02,83.75  
2015/2/27,85.12,8362081,85.95,86.56,85  
2015/2/26,85.37,8758019,86.92,87.16,85.2  
2015/2/25,86.19,13592080,84.38,86.83,84.36  
2015/2/24,84.69,15817560,85.53,85.53,83.88  
2015/2/23,85.47,9352529,86.51,86.68,85.25  
2015/2/20,86.64,7805009,87.25,87.29,86.38  
2015/2/19,86.89,7568275,86.81,87.87,86.71  
2015/2/18,86.74,7415961,87.1,87.43,86.5  
2015/2/17,86.85,15133130,88.78,88.99,86.7  
2015/2/13,89.05,14587390,88.2,89.3,87.65  
2015/2/12,87.1,15143910,85.6,88.3,85.55  
2015/2/11,86,12281340,87.58,87.7,85.82  
2015/2/10,87.26,12026260,87.01,87.47,86.52  
2015/2/9,86,12088070,85.83,86.75,85.47  
2015/2/6,85.68,17597490,87.11,87.4,85.42  
2015/2/5,87,28867570,89.58,89.84,86.1  
2015/2/4,90,14599400,90.99,91.88,89.48  
2015/2/3,90.61,13475970,91.65,91.65,89.9  
2015/2/2,90.13,18845140,91.13,91.66,88.61  
2015/1/30,89.08,36747370,89.6,92,88.11  
2015/1/29,89.81,76430640,90.53,90.74,87.36  
2015/1/28,98.45,42012700,100.3,101.49,97.79  
2015/1/27,102.94,15591560,102.89,103.57,100.58  
2015/1/26,103.99,10656340,104.4,105.13,103.33  
2015/1/23,103.11,9860504,104.02,105.2,103.02  
2015/1/22,104,11400180,104.6,104.92,103.1  
2015/1/21,103.29,15141470,100.75,103.86,100.32  
2015/1/20,100.04,12049470,98.3,100.21,97.59  
2015/1/16,96.89,13313120,96.09,97.8,95.52  
2015/1/15,96.31,18182200,99.67,100.14,96.02  
2015/1/14,99.58,17785990,99.28,100.18,98.06  
2015/1/13,100.77,11275790,102.59,102.85,100.01  
2015/1/12,101.62,7946740,103.2,103.36,101.21  
2015/1/9,103.02,10193410,105.24,105.3,102.89  
2015/1/8,105.03,12904640,102.95,105.3399,102.68  
2015/1/7,102.13,11000010,104.59,104.74,102.03  
2015/1/6,103.32,15662710,101.25,103.85,100.11  
2015/1/5,101,18313750,102.76,103.02,99.9  
2015/1/2,103.6,12286400,104.24,104.72,102.52  
2014/12/31,103.94,10254350,106.46,106.47,103.69  
2014/12/30,105.75,10185890,105.64,106.71,105.13  
2014/12/29,105.98,8059191,105.95,107.66,105.64  
2014/12/26,105.95,6523956,105.99,106.94,105.5  
2014/12/24,105.95,5870358,105.68,107.21,105.6  
2014/12/23,105.52,19077270,108.3,108.47,103.88  
2014/12/22,108.77,12985130,110.63,110.98,108.53  
2014/12/19,110.65,14785340,109.93,110.65,108.04  
2014/12/18,109.25,22749890,110.58,111.2,108.26  
2014/12/17,109.02,17242940,107.11,109.19,106.28  
2014/12/16,105.77,21655360,103.75,107.68,103.7  
2014/12/15,104.7,16476750,106.39,107.77,103.7  
2014/12/12,105.11,14504050,104.7,107.45,104.179  
2014/12/11,104.97,15658440,104.44,106.84,104.29  
2014/12/10,103.88,18398440,107.09,107.38,103.51  
2014/12/9,107.48,24805130,102.27,107.95,101.2  
2014/12/8,105.07,19166550,105.97,107.4,104.2101  
2014/12/5,107.9,12125090,109.6,110.35,107.76  
2014/12/4,109.17,10790400,110.1,110.5,108.91  
2014/12/3,110.64,16164860,110.4,111.68,108.798  

导入数据库

create table tmp(c1 text,c2 numeric,c3 numeric,c4 numeric,c5 numeric,c6 numeric);  
copy tmp from '/home/postgres/ali.txt' with (format csv);  

调换一下顺序

insert into test select row_number() over(order by ctid desc), c2 from tmp;  

校验函数

CREATE OR REPLACE FUNCTION public.check_predict(  
IN ov integer,   --  校验哪条记录  
OUT rv numeric,  --  真实值  
OUT pv numeric,  --  预测值  
OUT dev numeric, --  误差  
OUT zv numeric,  --  自变量  
OUT v_slope numeric, --  斜率  
OUT v_inter numeric, --  截距  
OUT v_r2  numeric  --  相关性  
)  
 RETURNS record  
 LANGUAGE plpgsql  
AS $function$  
declare   
  r2_1 numeric := 0; -- 相关性  
  r2_2 numeric := 0; -- 最大相关性  
  var int;           --  样本起点  
  inter_1 numeric;  --  截距  
  slope_1 numeric;  --  斜率  
  inter_2 numeric;  --  最大相关性截距  
  slope_2 numeric;  --  最大相关性斜率  
  realv numeric;    --  真实数据  
  predicv numeric;  --  预测数据  
  offset_var int := ov;   -- 倒数第?个值的预测值, 不停迭代, 最后计算所有的实际值和预测值的corr, 选择最佳相关?  
  v_cnt int8 := 0;  
  v_lmt int := 90;  --  使用的最大样本集, 影响预测准确度  
  v_min int := 5;   --  使用的最小样本数, 影响预测准确度  
  zcnt numeric := 0;    --  自变量  
begin  
  select count(*) into v_cnt from test;  
  
  --  自变量 t1  
  --  因变量 t2  
  
  --  筛选最大相关度的样本数, 并记录下储斜率, 截距.  
  for i in 0..v_lmt   
  loop  
    with t1 as (select row_number() over(order by id desc) as rn,* from (select id,lag(cnt,1) over(order by id) as zbl_cnt,cnt from test order by id desc offset offset_var limit v_lmt+v_min) t  
                where t.zbl_cnt is not null  
           )  
      select regr_intercept(t1.cnt,t1.zbl_cnt), regr_slope(t1.cnt,t1.zbl_cnt), regr_r2(t1.cnt,t1.zbl_cnt)   
        into inter_1,slope_1,r2_1  
      from t1  
      where t1.rn<=i+v_min;  
  
    if r2_1>r2_2 then   
      inter_2 := inter_1;  
      slope_2 := slope_1;  
      r2_2 := r2_1;  
      var := i+v_min;  
    end if;  
  end loop;  
  
  raise notice 'offset:%, 最大相关度样本数:%, 截距%, 斜率%, 相关性%', offset_var, var, round(inter_2,4), round(slope_2,4), round(r2_2,4);  
  select slope_2*cnt+inter_2, cnt into predicv,zcnt from test order by id desc offset offset_var limit 1;  
  
  --  offset_var=0的自变量, 表示最近一个值, 所以真实值还不存在, 那么realv=NULL  
  if  offset_var=0 then  
    realv := NULL;  
  else  
    select cnt into realv from test order by id desc offset offset_var-1 limit 1;  
  end if;  
  
  raise notice '自变量%, 真实数据%, 预测数据%, 本次预测偏差%%%', zcnt, realv, round(predicv,2), abs(1-round(predicv/realv,4))*100;  
  
  rv := realv;  
  pv := round(predicv,2);  
  dev := abs(1-round(predicv/realv,4));  
  zv  := zcnt;  
  v_slope := slope_2;  
  v_inter := inter_2;  
  v_r2 := r2_2;  
  
  return;  
end;  
$function$;  

预测结果偏差较大, 看dev这列显示偏差

(如果你的数据包含今天的收盘价格的数据的话, 第一行就是预测的明天的收盘价格)

postgres=# select (check_predict(i)).* from generate_series(0,50) t(i);  
   rv   |   pv   |  dev   |   zv   |      v_slope       |      v_inter       |        v_r2          
--------+--------+--------+--------+--------------------+--------------------+--------------------  
        |  81.50 |        |  81.58 |  0.930196213911424 |   5.61421408650552 |  0.461071168376043  
  81.58 |  84.90 | 0.0407 |     84 |  0.560745221248429 |   37.8015394128117 |   0.23571694348864  
     84 |  85.84 | 0.0219 |  85.12 |  0.416036492272315 |    50.428211921701 |  0.160707728559778  
  85.12 |  86.06 | 0.0110 |  85.37 |   0.31332110091768 |   59.3087633551948 | 0.0916611831863153  
  85.37 |  86.40 | 0.0121 |  86.19 |  0.223065327998904 |   67.1752208792373 | 0.0892441401437571  
  86.19 |  85.51 | 0.0078 |  84.69 |  0.509756185614852 |    42.343562005006 |  0.338718526733452  
  84.69 |  85.97 | 0.0151 |  85.47 |   0.60492702580558 |   34.2628366224153 |  0.408610576552954  
  85.47 |  86.86 | 0.0163 |  86.64 |  0.641095276611868 |   31.3167837973076 |  0.422562923551744  
  86.64 |  87.06 | 0.0049 |  86.89 |    0.6396258966744 |   31.4865883689919 |  0.442022365274712  
  86.89 |  87.31 | 0.0048 |  86.74 |  0.314176461801723 |   60.0558838893752 |  0.345972028763493  
  86.74 |  87.07 | 0.0038 |  86.85 |  0.556953610508981 |   38.6967242480077 |  0.709038352162874  
  86.85 |  88.54 | 0.0194 |  89.05 |   0.71646536311035 |   24.7355217235248 |  0.806474667992637  
  89.05 |  86.82 | 0.0250 |   87.1 |  0.828893551212771 |   14.6233989516049 |  0.843205122436764  
   87.1 |  85.63 | 0.0169 |     86 |  0.876681721784925 |   10.2362647858593 |  0.868757340937373  
     86 |  86.79 | 0.0092 |  87.26 |  0.907607969680494 |   7.59324024469571 |  0.872354617595825  
  87.26 |  85.35 | 0.0219 |     86 |  0.963046324742614 |   2.52920153372814 |  0.854052902081886  
     86 |  85.04 | 0.0111 |  85.68 |  0.984857752501153 |  0.658717160003272 |  0.818153667442131  
  85.68 |  86.65 | 0.0113 |     87 |  0.968673034113725 |    2.3751701572406 |  0.782575372168168  
     87 |  90.00 | 0.0345 |     90 |  0.910991684444145 |   8.01456054235934 |  0.742602997511869  
     90 |  90.69 | 0.0076 |  90.61 |  0.897821640533335 |   9.33440206466203 |  0.713122343798605  
  90.61 |  90.13 | 0.0053 |  90.13 |  0.909958318693295 |   8.11836657246885 |  0.676080976394243  
  90.13 |  88.54 | 0.0177 |  89.08 |  0.959429536023691 |   3.07240205281187 |  0.608007093767562  
  89.08 |  89.76 | 0.0076 |  89.81 |   0.91029848144619 |   8.00500306348446 |  0.404654669040077  
  89.81 |  99.27 | 0.1054 |  98.45 |  0.657207082561843 |   34.5727385355619 |  0.393214748683796  
  98.45 | 102.53 | 0.0415 | 102.94 |  0.704409588700779 |   30.0196224514047 |  0.502462865561183  
 102.94 | 103.45 | 0.0049 | 103.99 |  0.703297557503838 |   30.3100075437914 |   0.46367875870034  
 103.99 | 102.47 | 0.0147 | 103.11 |   0.62837962126522 |   37.6738507164757 |  0.403487822887036  
 103.11 | 103.10 | 0.0001 |    104 |  0.643348564991283 |   36.1926878205066 |  0.412471976813411  
    104 | 102.47 | 0.0147 | 103.29 |  0.609715827989882 |   39.4938840550046 |  0.434204693264911  
 103.29 | 100.19 | 0.0300 | 100.04 |  0.714689142171646 |   28.6965783703781 |  0.579882468389968  
 100.04 |  96.87 | 0.0317 |  96.89 |  0.891706172449747 |   10.4761802869589 |  0.673676693300631  
  96.89 |  95.65 | 0.0128 |  96.31 |  0.996480964507578 | -0.324693104249417 |  0.614510491219348  
  96.31 |  99.98 | 0.0381 |  99.58 |  0.805484441868333 |   19.7717089277751 |  0.512838450280928  
  99.58 | 101.72 | 0.0215 | 100.77 |  0.580782945386486 |   43.1942330487114 |  0.411626746536197  
 100.77 | 102.31 | 0.0153 | 101.62 |   0.60960923807431 |   40.3630398853694 |  0.534574960836081  
 101.62 | 103.28 | 0.0163 | 103.02 |  0.715901485259652 |   29.5241464649391 |  0.578753862361246  
 103.02 | 104.93 | 0.0185 | 105.03 |  0.755513345311285 |   25.5752266330287 |  0.637919333414959  
 105.03 | 102.17 | 0.0272 | 102.13 |   0.85397742981656 |   14.9562551932185 |  0.706505426646503  
 102.13 | 103.39 | 0.0123 | 103.32 |  0.816331012523094 |   19.0456461040167 |  0.678069331200632  
 103.32 |  97.81 | 0.0533 |    101 |   1.59944623063686 |  -63.7322919498157 |  0.754678672545085  
    101 | 103.53 | 0.0251 |  103.6 |  0.863779780883968 |   14.0441886225418 |  0.661109144730258  
  103.6 | 103.94 | 0.0033 | 103.94 |  0.838387460079692 |   16.7970520107185 |  0.577059699997509  
 103.94 | 105.92 | 0.0191 | 105.75 | -0.133229605047718 |   120.010159783732 |  0.804055392782976  
 105.75 | 105.93 | 0.0017 | 105.98 | -0.145112549418295 |   121.311379359051 |  0.969009451994014  
 105.98 | 105.95 | 0.0003 | 105.95 |                  0 |   105.949999994131 |                  1  
 105.95 | 105.89 | 0.0005 | 105.95 |  -0.13230769230761 |   119.911107692316 |   0.99999999999768  
 105.95 |  99.90 | 0.0571 | 105.52 |   1.72872340425854 |  -82.5132446808904 |   1.00000000000077  
 105.52 | 111.29 | 0.0547 | 108.77 |  -1.34285714286032 |   257.357142857695 |   1.00000000000725  
 108.77 | 119.17 | 0.0956 | 110.65 |   6.08695652160159 |  -554.349999972815 |   1.00000000000081  
 110.65 | 109.27 | 0.0125 | 109.25 | 0.0707692307696992 |    101.53473846147 |  0.999999999967684  
 109.25 | 118.89 | 0.0883 | 109.02 |   3.03738317759831 |  -212.244018694157 |   1.00000000001163  
(51 rows)  

贵州茅台的结果

预测偏差还比较理想(可能有些股票受到的外界因素影响较大, 有些比较小, 因为本文没有引入多元分析, 就这样吧)

postgres=# select (check_predict(i)).* from generate_series(0,50) t(i);  
   rv   |   pv   |  dev   |   zv   |      v_slope      |      v_inter      |       v_r2          
--------+--------+--------+--------+-------------------+-------------------+-------------------  
        | 155.68 |        | 154.75 | 0.749327368763041 |  39.7193487454018 | 0.443299946747894  
 154.75 | 156.33 | 0.0102 | 155.00 | 0.658983381690956 |  54.1888692212598 |  0.31605703462515  
 155.00 | 158.61 | 0.0233 | 157.81 | 0.505691375378367 |  78.8096738403756 | 0.240429553874766  
 157.81 | 159.35 | 0.0097 | 159.02 | 0.471362121189687 |  84.3914502631931 |  0.23632158869318  
 159.02 | 159.89 | 0.0055 | 159.87 | 0.502631474677655 |  79.5350129389031 | 0.262084379639135  
 159.87 | 160.46 | 0.0037 | 161.00 | 0.486478989379233 |  82.1377272523488 |  0.29236983925612  
 161.00 | 161.73 | 0.0045 | 162.74 | 0.592859008065614 |  65.2441140525351 |  0.38051931752417  
 162.74 | 161.22 | 0.0094 | 162.33 | 0.522125410976863 |  76.4612098979163 | 0.450724600316921  
 162.33 | 161.07 | 0.0078 | 161.50 | 0.670999115633002 |  52.7013374696206 | 0.535249445391989  
 161.50 | 160.77 | 0.0045 | 160.92 |  0.72782591656929 |  43.6501486130006 | 0.574758659704234  
 160.92 | 156.66 | 0.0264 | 156.49 | 0.885275616598876 |  18.1277330183775 | 0.655113491671201  
 156.49 | 157.82 | 0.0085 | 156.70 | 0.757494701869893 |    39.12388483696 | 0.495319263616962  
 156.70 | 161.18 | 0.0286 | 160.71 | 0.725555251667536 |  44.5773888438119 | 0.584398418586498  
 160.71 | 160.13 | 0.0036 | 159.25 | 0.718340613774721 |  45.7320036637828 | 0.626975960572309  
 159.25 | 158.92 | 0.0021 | 158.02 | 0.776715149169011 |  36.1819821709587 | 0.676670415905896  
 158.02 | 159.06 | 0.0066 | 158.47 | 0.812642712296025 |  30.2825991865648 | 0.679920408166994  
 158.47 | 160.77 | 0.0145 | 160.50 | 0.824536957368669 |  28.4277502764865 | 0.708045226118134  
 160.50 | 161.95 | 0.0090 | 161.90 | 0.843561194042496 |  25.3766280000652 | 0.722573245190375  
 161.90 | 161.51 | 0.0024 | 161.49 | 0.847987413256958 |  24.5663395825434 | 0.721633674185219  
 161.49 | 163.60 | 0.0131 | 163.87 | 0.870581601947346 |  20.9406294921026 | 0.725206713695765  
 163.87 | 163.51 | 0.0022 | 163.70 | 0.833749346623708 |  27.0240967784583 | 0.670383837164832  
 163.70 | 168.62 | 0.0301 | 167.65 |  1.05254359115864 | -7.83507005049674 | 0.735574587910647  
 167.65 | 165.25 | 0.0143 | 165.70 | 0.883481284376093 |  18.8595907684336 | 0.566222610315986  
 165.70 | 163.61 | 0.0126 | 164.70 | 0.760665237060595 |  38.3314388491422 | 0.437037290387292  
 164.70 | 161.46 | 0.0197 | 162.58 |  0.63926420779486 |  57.5312362089702 | 0.386820980329109  
 162.58 | 158.80 | 0.0232 | 158.81 | 0.717636038072266 |  44.8353807921515 | 0.569066830110592  
 158.81 | 158.34 | 0.0030 | 158.20 | 0.761022139545313 |  37.9453143278904 | 0.597076723252649  
 158.20 | 156.01 | 0.0138 | 156.00 | 0.890859569629593 |  17.0349264413836 | 0.660262576219616  
 156.00 | 155.91 | 0.0006 | 156.00 | 0.928603251444626 |   11.051262047026 | 0.569697453056979  
 156.00 | 158.28 | 0.0146 | 157.72 | 0.723658951735034 |  44.1435343246399 |  0.44078135697188  
 157.72 | 159.30 | 0.0100 | 158.50 | 0.593435970103012 |  65.2440679649604 | 0.394699579585899  
 158.50 | 159.91 | 0.0089 | 159.27 | 0.632807705774723 |  59.1184436985211 | 0.387382904311161  
 159.27 | 160.63 | 0.0085 | 160.00 | 0.604419927649386 |  63.9233718732132 | 0.330175855885006  
 160.00 | 162.13 | 0.0133 | 162.00 | 0.479607769753778 |  84.4368575468047 | 0.257868311166715  
 162.00 | 159.99 | 0.0124 | 158.74 | 0.624745284091551 |  60.8132213491609 | 0.426844197393141  
 158.74 | 160.57 | 0.0115 | 159.75 |   0.6106793906001 |  63.0161876923878 | 0.337283942825567  
 159.75 | 161.95 | 0.0138 | 162.00 | 0.453875422780405 |  88.4214116897916 | 0.235385451808691  
 162.00 | 162.13 | 0.0008 | 162.20 | 0.514218487026421 |  78.7230165782795 | 0.291003288858034  
 162.20 | 162.09 | 0.0007 | 162.02 | 0.556324729871488 |  71.9532675373276 | 0.326799171637252  
 162.02 | 162.16 | 0.0009 | 162.04 | 0.579222185293716 |  68.3028288619664 | 0.344557168377077  
 162.04 | 162.16 | 0.0007 | 161.81 | 0.556599549002475 |  72.0969606318282 | 0.307924224254188  
 161.81 | 162.66 | 0.0052 | 162.48 | 0.500693143269678 |   81.305630489949 | 0.298678253677398  
 162.48 | 162.22 | 0.0016 | 161.96 | 0.568957052943135 |  70.0759551450734 | 0.346462148633283  
 161.96 | 162.89 | 0.0057 | 163.10 |  0.39281014679101 |  98.8215502236369 | 0.340803249529129  
 163.10 | 164.15 | 0.0065 | 164.80 | 0.640630057919025 |  58.5763291969098 | 0.536313525971144  
 164.80 | 163.63 | 0.0071 | 164.00 |   0.7153108267714 |  46.3161409479079 | 0.611327694763799  
 164.00 | 162.73 | 0.0078 | 163.00 | 0.671618352789207 |  53.2545511273858 | 0.679718050633678  
 163.00 | 160.66 | 0.0144 | 160.17 | 0.646714009825995 |  57.0713176290233 | 0.804063835532055  
 160.17 | 159.11 | 0.0066 | 158.29 | 0.769358075679891 |  37.3298385738639 | 0.848976826898375  
 158.29 | 161.29 | 0.0190 | 160.74 | 0.845694258048787 |  25.3560723887127 | 0.887140705169265  
 160.74 | 165.13 | 0.0273 | 164.30 | 0.928055617122675 |  12.6470087942632 | 0.910309053795291  
(51 rows)  

Flag Counter

digoal’s 大量PostgreSQL文章入口