;===========================================

;MatMate-Listing vom:08.10.2011 12:50:26

;============================================

[1] // MO-Problem:

[2] //    Express N rowvectors optimally by m rowvector components (least squares)

[3] //    Proposed solution: use PCA on rows of datamatrix

//  referring to

[4] // -------------------------------------------------------------

[5] // 1) generate random-data

[6] n=6

[7] v=20   // this is big-N in the problem description

[8] m=3

[9]          set randomstart=41

[10] Z = randomn(v,n)        // generate normally distributed randomdata

[11] Z = zvaluezl(abwzl(Z))  // center and standardize Z rowwise

Z :

 1.91838 0.240082 -0.118702 -0.111731 -0.529889 -1.39814 1.74008 0.287299 0.585276 -1.29584 -0.678583 -0.638233 1.69592 -1.45651 0.399531 -0.603951 0.471103 -0.506085 0.67544 -0.932264 0.744144 -0.489866 -1.39173 1.39428 -0.37183 0.518629 -0.488861 -1.75059 1.26953 0.823114 -0.937607 2.04641 -0.405767 -0.843071 -0.084732 0.224772 -1.98645 0.045982 -0.087153 0.040762 0.807935 1.17893 1.68112 0.18502 0.361601 -0.635362 0.021527 -1.61391 0.528903 -0.23346 1.34817 -1.11442 0.845862 -1.37506 0.459548 1.57271 -1.15695 -1.33151 0.451568 0.00463 -1.14844 -0.568889 -0.228503 1.98691 -0.442786 0.4017 1.14818 -0.315287 -0.131685 0.361329 -1.92229 0.859752 1.24579 1.23955 0.280134 -1.1521 -0.48713 -1.12624 -1.68109 1.3255 0.603783 -0.765929 0.666159 -0.148422 1.53767 1.21297 -0.960227 -0.763202 -0.256514 -0.770693 1.49842 -0.476369 0.066837 -1.75727 0.659758 0.00862 0.880739 1.11057 0.980774 -1.17588 -0.76932 -1.02688 -0.835973 -0.119279 1.52519 -0.658294 -1.07909 1.16744 -0.992609 -0.071102 1.9724 -0.872811 0.404471 -0.44035 -0.597897 -0.107142 2.01012 -1.04083 0.353781 -0.618027

[12] //-------------------------------------------------------

[13] // 2) find principal components of rowvectors;

[14] //      note: due to initial centering of rowvectors there are

[15] //            maximally only n-1 independent components!

[16]   // recenter datamatrix columnwise

[17]   ME = meansp(Z)  // rowvector of means

ME :

 0.322915 0.27522 0.365006 -0.698682 -0.084519 -0.17994

[18]   C = Z - ME     // recentered data, column-means = 0

C :

 1.59547 -0.035138 -0.483707 0.586952 -0.445371 -1.2182 1.41717 0.012079 0.22027 -0.597159 -0.594064 -0.458293 1.373 -1.73173 0.034525 0.094731 0.555622 -0.326145 0.352524 -1.20748 0.379138 0.208816 -1.30722 1.57422 -0.694746 0.243409 -0.853866 -1.0519 1.35405 1.00305 -1.26052 1.77119 -0.770773 -0.144388 -0.000213 0.404712 -2.30937 -0.229238 -0.452159 0.739445 0.892454 1.35887 1.35821 -0.0902 -0.003405 0.06332 0.106045 -1.43397 0.205988 -0.50868 0.983165 -0.415737 0.93038 -1.19512 0.136633 1.29749 -1.52196 -0.632827 0.536087 0.18457 -1.47135 -0.844109 -0.593509 2.6856 -0.358268 0.58164 0.825266 -0.590507 -0.496691 1.06001 -1.83777 1.03969 0.922876 0.964327 -0.084872 -0.453417 -0.402611 -0.946303 -2.004 1.05028 0.238777 -0.067247 0.750678 0.031518 1.21475 0.937747 -1.32523 -0.06452 -0.171996 -0.590753 1.17551 -0.751589 -0.298169 -1.05858 0.744276 0.18856 0.557824 0.835345 0.615769 -0.477195 -0.684801 -0.846941 -1.15889 -0.394499 1.16019 0.040388 -0.994571 1.34738 -1.31552 -0.346322 1.60739 -0.174129 0.48899 -0.26041 -0.920812 -0.382362 1.64512 -0.342153 0.438299 -0.438087

[19]   // get the required rotation-matrix T first

[20]   T = gettrans(C',"pca")'

[21] PC_n = T * C    // the first n-1 rows contain the principal components

PC_n :

 5.38898 -0.0154 -0.580167 -1.22538 -0.773942 -2.7941 -1.00277 3.01291 -0.668002 -2.31587 2.44816 -1.47443 0.411061 1.7402 -3.47125 1.08172 -1.09449 1.33276 -0.611711 1.68428 1.15726 0.992924 -1.87814 -1.34461 -0.296343 -0.610486 -0.768131 2.00798 1.32266 -1.65568 0 0 -0 0 -0 0 -0 0 0 0 -0 -0 -0 -0 -0 0 0 -0 -0 -0 0 0 -0 0 0 -0 -0 -0 -0 -0 0 0 -0 0 0 0 0 0 0 0 -0 -0 0 0 0 -0 0 -0 0 -0 -0 -0 0 -0 -0 0 -0 -0 0 0 0 -0 0 -0 0 0 0 -0 -0 0 -0 0 -0 0 -0 0 -0 -0 0 -0 0 -0 0 -0 -0 -0 0 0 0 0

[22] // 3) now set all rowvectors with index k>m to zero into a new matrix PC_m

//    in the above table all shaded rowvectors are set to zero

[23] PC_m = { PC_n[1..m,*], Null(v-m,n) }  // only the first 3 rowvectors are nonzero

[24] // 4) reverse the rotation where only the m-components are used

[25]    C_m = T' * PC_m

C_m :

 1.75596 -0.100569 -0.436641 -0.076575 -0.541828 -0.600352 1.39155 -0.188951 0.015763 -0.264869 -0.247687 -0.705805 1.19979 -0.943564 0.652155 0.0427 -0.463334 -0.487745 0.170699 -1.23948 0.22696 1.07412 -1.03185 0.799545 -1.02039 0.950986 -0.413904 -0.320935 0.65011 0.154136 -1.17144 1.36559 -1.08864 -0.117363 0.524211 0.487646 -2.38773 0.222993 -0.083633 0.613518 0.281759 1.3531 1.47461 -0.068471 0.095177 -0.491965 -0.07219 -0.937157 0.241433 -0.327815 1.1755 -0.771353 0.603388 -0.921155 0.033387 1.50268 -1.4003 -0.380582 0.342868 -0.098055 -1.22944 -0.711087 -0.306785 1.43752 -0.866202 1.67599 0.836085 -0.847704 -0.728997 1.28589 -1.44863 0.903364 1.04495 0.527917 -0.409247 -0.544182 0.12917 -0.748603 -1.88762 0.815493 0.098398 -0.347871 0.973933 0.347664 1.25666 0.927649 -1.30644 -0.245231 -0.20811 -0.424526 0.857861 -0.042958 0.148162 -0.36532 0.028768 -0.626513 0.717834 0.154887 0.089592 -0.48013 0.181929 -0.664112 -1.19219 -0.86487 0.699611 0.696104 -0.217129 0.87847 -1.23716 -0.547944 1.47232 -0.316453 0.707459 -0.078213 -0.854834 -0.584786 1.50095 -0.427022 0.673366 -0.307678

[26] Z_m = C_m + ME

Z_m :

 2.07888 0.174651 -0.071635 -0.775258 -0.626347 -0.780292 1.71446 0.086269 0.380768 -0.963551 -0.332206 -0.885745 1.5227 -0.668344 1.01716 -0.655983 -0.547852 -0.667686 0.493614 -0.96426 0.591966 0.37544 -1.11637 0.619605 -0.697478 1.22621 -0.048898 -1.01962 0.565591 -0.025804 -0.848527 1.64081 -0.723638 -0.816046 0.439693 0.307706 -2.06482 0.498213 0.281373 -0.085165 0.197241 1.17316 1.79752 0.206749 0.460182 -1.19065 -0.156709 -1.1171 0.564348 -0.052595 1.54051 -1.47003 0.518869 -1.10109 0.356302 1.7779 -1.0353 -1.07926 0.258349 -0.277995 -0.906525 -0.435866 0.058221 0.738839 -0.950721 1.49605 1.159 -0.572484 -0.363991 0.587204 -1.53315 0.723424 1.36786 0.803138 -0.044241 -1.24286 0.044651 -0.928543 -1.5647 1.09071 0.463404 -1.04655 0.889414 0.167724 1.57957 1.20287 -0.941433 -0.943914 -0.292629 -0.604466 1.18078 0.232263 0.513168 -1.064 -0.05575 -0.806453 1.04075 0.430108 0.454597 -1.17881 0.097411 -0.844052 -0.869271 -0.58965 1.06462 -0.002578 -0.301647 0.69853 -0.914249 -0.272724 1.83732 -1.01514 0.62294 -0.258153 -0.531919 -0.309566 1.86596 -1.1257 0.588847 -0.487618

[27] // 5) check quality of approximation

[28] chk = (Z-Z_m)   // Check differences

chk :

 -0.160497 0.065431 -0.047067 0.663527 0.096457 -0.617852 0.025619 0.20103 0.204507 -0.33229 -0.346377 0.247512 0.173211 -0.788169 -0.61763 0.052032 1.01896 0.161601 0.181826 0.031996 0.152178 -0.865306 -0.275369 0.774676 0.325647 -0.707577 -0.439963 -0.730968 0.703943 0.848918 -0.08908 0.405594 0.317871 -0.027025 -0.524425 -0.082934 0.078364 -0.452231 -0.368526 0.125927 0.610694 0.005771 -0.116401 -0.021729 -0.098581 0.555285 0.178235 -0.496809 -0.035445 -0.180865 -0.192336 0.355616 0.326992 -0.273962 0.103245 -0.20519 -0.121655 -0.252244 0.193219 0.282625 -0.24191 -0.133022 -0.286724 1.24808 0.507934 -1.09435 -0.010818 0.257197 0.232306 -0.225875 -0.389137 0.136328 -0.122069 0.43641 0.324375 0.090765 -0.531781 -0.1977 -0.116386 0.234784 0.140379 0.280624 -0.223256 -0.316146 -0.041903 0.010099 -0.018794 0.180711 0.036114 -0.166227 0.317645 -0.708631 -0.446331 -0.693264 0.715508 0.815073 -0.16001 0.680457 0.526177 0.002935 -0.866731 -0.182828 0.033299 0.470371 0.460576 -0.655716 -0.777443 0.468913 -0.07836 0.201622 0.135079 0.142324 -0.218469 -0.182197 -0.065978 0.202424 0.14416 0.084869 -0.235067 -0.130409

[29] err = (sqrt(sqsum(chk))) // check overall-error

err :

4.521591