Mahalanobis Distance with the matrix libraries of MQ , and beer. – Statistics – 18 December 2023
First what is it ?
Secondly some docs
Wikipedia page
We will need :
- Covariance matrix
- Matrix inversion
- Matrix multiplication
- Vector mean
The matrix and vectors library mql5 has covers all of our needs actually.
Okay so we will build a structure that we can prep once and reuse for as long as the sample set is not changing.
What do we mean with sample set ?
A set of observations with properties .
To simplify , lets say you have 10 candles (chart candles) ,and they have OHLC . So you have 10 samples and 4 properties or 4 features.
Here is an example usage
double open[],high[],low[],close[]; ArrayResize(open,10,0); ArrayResize(high,10,0); ArrayResize(low,10,0); ArrayResize(close,10,0); for(int i=0;i<10;i++){ open[i]=iOpen(_Symbol,_Period,i+1); high[i]=iHigh(_Symbol,_Period,i+1); low[i]=iLow(_Symbol,_Period,i+1); close[i]=iClose(_Symbol,_Period,i+1); } mahalanober M; M.setup(4,10); M.fill_feature(0,open); M.fill_feature(1,high); M.fill_feature(2,low); M.fill_feature(3,close); double md=M.distanceOfSampleToDistribution(2); Print("Mahalabonis Distance of bar 2 to the distribution "+DoubleToString(md,4)); md=M.distanceOfSampleToSample(5,0); Print("Mahalabonis Distance of bar[0] to bar[5] in the distribution "+DoubleToString(md,4));
and here is the structure
struct mahalanober{ private: vector features[]; bool filled[]; vector feature_means; matrix covariance_matrix_inverse; int total_features,total_samples; public: mahalanober(void){reset();} ~mahalanober(void){reset();} void reset(){ total_features=0; total_samples=0; ArrayFree(features); ArrayFree(filled); feature_means.Init(0); covariance_matrix_inverse.Init(0,0); } void setup(int _total_features, int _total_samples){ total_features=_total_features; total_samples=_total_samples; ArrayResize(features,total_features,0); ArrayResize(filled,total_features,0); ArrayFill(filled,0,total_features,false); feature_means.Init(total_features); for(int i=0;i<ArraySize(features);i++){ features[i].Init(total_samples); } } bool fill_feature(int which_feature_ix, double &values_across_samples[]){ if(which_feature_ix<ArraySize(features)){ if(ArraySize(values_across_samples)==total_samples){ for(int i=0;i<total_samples;i++){ features[which_feature_ix][i]=values_across_samples[i]; } feature_means[which_feature_ix]=features[which_feature_ix].Mean(); filled[which_feature_ix]=true; if(all_filled()){ calculate_inverse_covariance_matrix(); } return(true); }else{ Print("MHLNB::fill_feature::Amount of values does not match total samples"); } }else{ Print("MHLNB::fill_feature::Feature("+IntegerToString(which_feature_ix)+") does not exist"); } return(false); } double distanceOfSampleToDistribution(int which_sample){ if(all_filled()){ if(which_sample<total_samples){ matrix term0; term0.Init(total_features,1); for(int i=0;i<total_features;i++){ term0[i][0]=features[i][which_sample]-feature_means[i]; } matrix term3=term0; matrix term1; term1=term0.Transpose(); matrix term2=term1.MatMul(covariance_matrix_inverse); matrix last_term=term2.MatMul(term3); return(MathSqrt(last_term[0][0])); }else{ Print("MLHNB::distanceOfSampleToDistribution()::Sample ("+IntegerToString(which_sample)+") does not exist returning 0.0"); } }else{ list_unfilled("distanceOfSampleToDistribution()"); } return(0.0); } double distanceOfSampleToSample(int sample_a,int sample_b){ if(all_filled()){ if(sample_a<total_samples){ if(sample_b<total_samples){ matrix term0; term0.Init(total_features,1); for(int i=0;i<total_features;i++){ term0[i][0]=features[i][sample_a]-features[i][sample_b]; } matrix term3=term0; matrix term1; term1=term0.Transpose(); matrix term2=term1.MatMul(covariance_matrix_inverse); matrix last_term=term2.MatMul(term3); return(MathSqrt(last_term[0][0])); }else{ Print("MLHNB::distanceOfSampleToSample()::Sample ("+IntegerToString(sample_b)+") does not exist returning 0.0"); } }else{ Print("MLHNB::distanceOfSampleToSample()::Sample ("+IntegerToString(sample_a)+") does not exist returning 0.0"); } }else{ list_unfilled("distanceOfSampleToSample()"); } return(0.0); } private: void calculate_inverse_covariance_matrix(){ matrix samples_by_features; samples_by_features.Init(total_samples,total_features); for(int f=0;f<total_features;f++){ for(int s=0;s<total_samples;s++){ samples_by_features[s][f]=features[f][s]; } } matrix covariance_matrix=samples_by_features.Cov(false); covariance_matrix_inverse=covariance_matrix.Inv(); } bool all_filled(){ if(total_features>0){ for(int i=0;i<total_features;i++){ if(!filled[i]){ return(false); } } return(true); } return(false); } void list_unfilled(string fx){ for(int i=0;i<total_features;i++){ if(!filled[i]){ Print("MLHNB::"+fx+"::Feature("+IntegerToString(i)+") is not filled!"); } } } };
If you see mistakes let me know
cheers
Comments are closed.