;===========================================
;MatMate-Listing vom:08.10.2011 12:50:26
;============================================
[1] // MO-Problem:
[2] // Express N rowvectors optimally by m rowvector components (least squares)
[3] // Proposed solution: use PCA on rows of datamatrix
// referring to
// http://mathoverflow.net/questions/77465/hyperplane-least-square-through-points
[4] // -------------------------------------------------------------
[5] // 1) generate random-data
[6] n=6
[7] v=20 // this is big-N in the problem description
[8] m=3
[9] set randomstart=41
[10] Z = randomn(v,n) // generate normally distributed randomdata
[11] Z = zvaluezl(abwzl(Z)) // center and standardize Z rowwise
Z :
1.918384 |
0.240082 |
-0.118702 |
-0.111731 |
-0.529889 |
-1.398144 |
1.740083 |
0.287299 |
0.585276 |
-1.295842 |
-0.678583 |
-0.638233 |
1.695915 |
-1.456513 |
0.399531 |
-0.603951 |
0.471103 |
-0.506085 |
0.675440 |
-0.932264 |
0.744144 |
-0.489866 |
-1.391735 |
1.394282 |
-0.371830 |
0.518629 |
-0.488861 |
-1.750585 |
1.269534 |
0.823114 |
-0.937607 |
2.046405 |
-0.405767 |
-0.843071 |
-0.084732 |
0.224772 |
-1.986454 |
0.045982 |
-0.087153 |
0.040762 |
0.807935 |
1.178928 |
1.681121 |
0.185020 |
0.361601 |
-0.635362 |
0.021527 |
-1.613907 |
0.528903 |
-0.233460 |
1.348171 |
-1.114420 |
0.845862 |
-1.375056 |
0.459548 |
1.572714 |
-1.156951 |
-1.331509 |
0.451568 |
0.004630 |
-1.148435 |
-0.568889 |
-0.228503 |
1.986914 |
-0.442786 |
0.401700 |
1.148182 |
-0.315287 |
-0.131685 |
0.361329 |
-1.922290 |
0.859752 |
1.245791 |
1.239548 |
0.280134 |
-1.152099 |
-0.487130 |
-1.126243 |
-1.681088 |
1.325498 |
0.603783 |
-0.765929 |
0.666159 |
-0.148422 |
1.537670 |
1.212968 |
-0.960227 |
-0.763202 |
-0.256514 |
-0.770693 |
1.498421 |
-0.476369 |
0.066837 |
-1.757267 |
0.659758 |
0.008620 |
0.880739 |
1.110565 |
0.980774 |
-1.175878 |
-0.769320 |
-1.026881 |
-0.835973 |
-0.119279 |
1.525193 |
-0.658294 |
-1.079090 |
1.167443 |
-0.992609 |
-0.071102 |
1.972401 |
-0.872811 |
0.404471 |
-0.440350 |
-0.597897 |
-0.107142 |
2.010120 |
-1.040835 |
0.353781 |
-0.618027 |
[12] //-------------------------------------------------------
[13] // 2) find principal components of rowvectors;
[14] // note: due to initial centering of rowvectors there are
[15] // maximally only n-1 independent components!
[16] // recenter datamatrix columnwise
[17] ME = meansp(Z) // rowvector of means
ME :
0.322915 |
0.275220 |
0.365006 |
-0.698682 |
-0.084519 |
-0.179940 |
[18] C = Z - ME // recentered data, column-means = 0
C :
1.595469 |
-0.035138 |
-0.483707 |
0.586952 |
-0.445371 |
-1.218204 |
1.417168 |
0.012079 |
0.220270 |
-0.597159 |
-0.594064 |
-0.458293 |
1.373000 |
-1.731733 |
0.034525 |
0.094731 |
0.555622 |
-0.326145 |
0.352524 |
-1.207485 |
0.379138 |
0.208816 |
-1.307216 |
1.574222 |
-0.694746 |
0.243409 |
-0.853866 |
-1.051903 |
1.354052 |
1.003054 |
-1.260522 |
1.771185 |
-0.770773 |
-0.144388 |
-0.000213 |
0.404712 |
-2.309369 |
-0.229238 |
-0.452159 |
0.739445 |
0.892454 |
1.358868 |
1.358206 |
-0.090200 |
-0.003405 |
0.063320 |
0.106045 |
-1.433967 |
0.205988 |
-0.508680 |
0.983165 |
-0.415737 |
0.930380 |
-1.195116 |
0.136633 |
1.297494 |
-1.521956 |
-0.632827 |
0.536087 |
0.184570 |
-1.471350 |
-0.844109 |
-0.593509 |
2.685596 |
-0.358268 |
0.581640 |
0.825266 |
-0.590507 |
-0.496691 |
1.060012 |
-1.837772 |
1.039692 |
0.922876 |
0.964327 |
-0.084872 |
-0.453417 |
-0.402611 |
-0.946303 |
-2.004003 |
1.050277 |
0.238777 |
-0.067247 |
0.750678 |
0.031518 |
1.214754 |
0.937747 |
-1.325233 |
-0.064520 |
-0.171996 |
-0.590753 |
1.175506 |
-0.751589 |
-0.298169 |
-1.058584 |
0.744276 |
0.188560 |
0.557824 |
0.835345 |
0.615769 |
-0.477195 |
-0.684801 |
-0.846941 |
-1.158888 |
-0.394499 |
1.160187 |
0.040388 |
-0.994571 |
1.347383 |
-1.315524 |
-0.346322 |
1.607395 |
-0.174129 |
0.488990 |
-0.260410 |
-0.920812 |
-0.382362 |
1.645115 |
-0.342153 |
0.438299 |
-0.438087 |
[19] // get the required rotation-matrix T first
[20] T = gettrans(C',"pca")'
[21] PC_n = T * C // the first n-1 rows contain the principal components
PC_n :
5.388984 |
-0.015400 |
-0.580167 |
-1.225378 |
-0.773942 |
-2.794096 |
-1.002772 |
3.012910 |
-0.668002 |
-2.315867 |
2.448157 |
-1.474426 |
0.411061 |
1.740197 |
-3.471250 |
1.081722 |
-1.094489 |
1.332760 |
-0.611711 |
1.684277 |
1.157263 |
0.992924 |
-1.878140 |
-1.344614 |
-0.296343 |
-0.610486 |
-0.768131 |
2.007980 |
1.322664 |
-1.655684 |
0.000000 |
0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
0.000000 |
0.000000 |
0.000000 |
-0.000000 |
-0.000000 |
-0.000000 |
-0.000000 |
-0.000000 |
0.000000 |
0.000000 |
-0.000000 |
-0.000000 |
-0.000000 |
0.000000 |
0.000000 |
-0.000000 |
0.000000 |
0.000000 |
-0.000000 |
-0.000000 |
-0.000000 |
-0.000000 |
-0.000000 |
0.000000 |
0.000000 |
-0.000000 |
0.000000 |
0.000000 |
0.000000 |
0.000000 |
0.000000 |
0.000000 |
0.000000 |
-0.000000 |
-0.000000 |
0.000000 |
0.000000 |
0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
-0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
-0.000000 |
0.000000 |
0.000000 |
0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
0.000000 |
0.000000 |
0.000000 |
-0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
0.000000 |
-0.000000 |
-0.000000 |
-0.000000 |
0.000000 |
0.000000 |
0.000000 |
0.000000 |
[22] // 3) now set all rowvectors with index k>m to zero into a new matrix PC_m
// in the above table all shaded rowvectors are set to zero
[23] PC_m = { PC_n[1..m,*], Null(v-m,n) } // only the first 3 rowvectors are nonzero
[24] // 4) reverse the rotation where only the m-components are used
[25] C_m = T' * PC_m
C_m :
1.755965 |
-0.100569 |
-0.436641 |
-0.076575 |
-0.541828 |
-0.600352 |
1.391549 |
-0.188951 |
0.015763 |
-0.264869 |
-0.247687 |
-0.705805 |
1.199789 |
-0.943564 |
0.652155 |
0.042700 |
-0.463334 |
-0.487745 |
0.170699 |
-1.239481 |
0.226960 |
1.074123 |
-1.031846 |
0.799545 |
-1.020393 |
0.950986 |
-0.413904 |
-0.320935 |
0.650110 |
0.154136 |
-1.171442 |
1.365591 |
-1.088644 |
-0.117363 |
0.524211 |
0.487646 |
-2.387733 |
0.222993 |
-0.083633 |
0.613518 |
0.281759 |
1.353096 |
1.474607 |
-0.068471 |
0.095177 |
-0.491965 |
-0.072190 |
-0.937157 |
0.241433 |
-0.327815 |
1.175502 |
-0.771353 |
0.603388 |
-0.921155 |
0.033387 |
1.502684 |
-1.400302 |
-0.380582 |
0.342868 |
-0.098055 |
-1.229440 |
-0.711087 |
-0.306785 |
1.437521 |
-0.866202 |
1.675993 |
0.836085 |
-0.847704 |
-0.728997 |
1.285887 |
-1.448635 |
0.903364 |
1.044945 |
0.527917 |
-0.409247 |
-0.544182 |
0.129170 |
-0.748603 |
-1.887618 |
0.815493 |
0.098398 |
-0.347871 |
0.973933 |
0.347664 |
1.256657 |
0.927649 |
-1.306439 |
-0.245231 |
-0.208110 |
-0.424526 |
0.857861 |
-0.042958 |
0.148162 |
-0.365320 |
0.028768 |
-0.626513 |
0.717834 |
0.154887 |
0.089592 |
-0.480130 |
0.181929 |
-0.664112 |
-1.192187 |
-0.864870 |
0.699611 |
0.696104 |
-0.217129 |
0.878470 |
-1.237164 |
-0.547944 |
1.472316 |
-0.316453 |
0.707459 |
-0.078213 |
-0.854834 |
-0.584786 |
1.500954 |
-0.427022 |
0.673366 |
-0.307678 |
[26] Z_m = C_m + ME
Z_m :
2.078880 |
0.174651 |
-0.071635 |
-0.775258 |
-0.626347 |
-0.780292 |
1.714464 |
0.086269 |
0.380768 |
-0.963551 |
-0.332206 |
-0.885745 |
1.522704 |
-0.668344 |
1.017161 |
-0.655983 |
-0.547852 |
-0.667686 |
0.493614 |
-0.964260 |
0.591966 |
0.375440 |
-1.116365 |
0.619605 |
-0.697478 |
1.226206 |
-0.048898 |
-1.019617 |
0.565591 |
-0.025804 |
-0.848527 |
1.640812 |
-0.723638 |
-0.816046 |
0.439693 |
0.307706 |
-2.064818 |
0.498213 |
0.281373 |
-0.085165 |
0.197241 |
1.173156 |
1.797522 |
0.206749 |
0.460182 |
-1.190648 |
-0.156709 |
-1.117097 |
0.564348 |
-0.052595 |
1.540507 |
-1.470035 |
0.518869 |
-1.101095 |
0.356302 |
1.777904 |
-1.035296 |
-1.079264 |
0.258349 |
-0.277995 |
-0.906525 |
-0.435866 |
0.058221 |
0.738839 |
-0.950721 |
1.496052 |
1.159000 |
-0.572484 |
-0.363991 |
0.587204 |
-1.533153 |
0.723424 |
1.367860 |
0.803138 |
-0.044241 |
-1.242864 |
0.044651 |
-0.928543 |
-1.564703 |
1.090714 |
0.463404 |
-1.046553 |
0.889414 |
0.167724 |
1.579572 |
1.202869 |
-0.941433 |
-0.943914 |
-0.292629 |
-0.604466 |
1.180776 |
0.232263 |
0.513168 |
-1.064002 |
-0.055750 |
-0.806453 |
1.040749 |
0.430108 |
0.454597 |
-1.178813 |
0.097411 |
-0.844052 |
-0.869271 |
-0.589650 |
1.064617 |
-0.002578 |
-0.301647 |
0.698530 |
-0.914249 |
-0.272724 |
1.837322 |
-1.015136 |
0.622940 |
-0.258153 |
-0.531919 |
-0.309566 |
1.865960 |
-1.125705 |
0.588847 |
-0.487618 |
[27] // 5) check quality of approximation
[28] chk = (Z-Z_m) // Check differences
chk :
-0.160497 |
0.065431 |
-0.047067 |
0.663527 |
0.096457 |
-0.617852 |
0.025619 |
0.201030 |
0.204507 |
-0.332290 |
-0.346377 |
0.247512 |
0.173211 |
-0.788169 |
-0.617630 |
0.052032 |
1.018956 |
0.161601 |
0.181826 |
0.031996 |
0.152178 |
-0.865306 |
-0.275369 |
0.774676 |
0.325647 |
-0.707577 |
-0.439963 |
-0.730968 |
0.703943 |
0.848918 |
-0.089080 |
0.405594 |
0.317871 |
-0.027025 |
-0.524425 |
-0.082934 |
0.078364 |
-0.452231 |
-0.368526 |
0.125927 |
0.610694 |
0.005771 |
-0.116401 |
-0.021729 |
-0.098581 |
0.555285 |
0.178235 |
-0.496809 |
-0.035445 |
-0.180865 |
-0.192336 |
0.355616 |
0.326992 |
-0.273962 |
0.103245 |
-0.205190 |
-0.121655 |
-0.252244 |
0.193219 |
0.282625 |
-0.241910 |
-0.133022 |
-0.286724 |
1.248075 |
0.507934 |
-1.094353 |
-0.010818 |
0.257197 |
0.232306 |
-0.225875 |
-0.389137 |
0.136328 |
-0.122069 |
0.436410 |
0.324375 |
0.090765 |
-0.531781 |
-0.197700 |
-0.116386 |
0.234784 |
0.140379 |
0.280624 |
-0.223256 |
-0.316146 |
-0.041903 |
0.010099 |
-0.018794 |
0.180711 |
0.036114 |
-0.166227 |
0.317645 |
-0.708631 |
-0.446331 |
-0.693264 |
0.715508 |
0.815073 |
-0.160010 |
0.680457 |
0.526177 |
0.002935 |
-0.866731 |
-0.182828 |
0.033299 |
0.470371 |
0.460576 |
-0.655716 |
-0.777443 |
0.468913 |
-0.078360 |
0.201622 |
0.135079 |
0.142324 |
-0.218469 |
-0.182197 |
-0.065978 |
0.202424 |
0.144160 |
0.084869 |
-0.235067 |
-0.130409 |
[29] err = (sqrt(sqsum(chk))) // check overall-error
err :
4.521591