MO 111008 Rowwise PCA

;===========================================

;MatMate-Listing vom:08.10.2011 12:50:26

;============================================

[1] // MO-Problem:

[2] // Express N rowvectors optimally by m rowvector components (least squares)

[3] // Proposed solution: use PCA on rows of datamatrix

// referring to

// http://mathoverflow.net/questions/77465/hyperplane-least-square-through-points

[4] // -------------------------------------------------------------

[5] // 1) generate random-data

[6] n=6

[7] v=20 // this is big-N in the problem description

[8] m=3

[9] set randomstart=41

[10] Z = randomn(v,n) // generate normally distributed randomdata

[11] Z = zvaluezl(abwzl(Z)) // center and standardize Z rowwise

Z :

1.918384	0.240082	-0.118702	-0.111731	-0.529889	-1.398144
1.740083	0.287299	0.585276	-1.295842	-0.678583	-0.638233
1.695915	-1.456513	0.399531	-0.603951	0.471103	-0.506085
0.675440	-0.932264	0.744144	-0.489866	-1.391735	1.394282
-0.371830	0.518629	-0.488861	-1.750585	1.269534	0.823114
-0.937607	2.046405	-0.405767	-0.843071	-0.084732	0.224772
-1.986454	0.045982	-0.087153	0.040762	0.807935	1.178928
1.681121	0.185020	0.361601	-0.635362	0.021527	-1.613907
0.528903	-0.233460	1.348171	-1.114420	0.845862	-1.375056
0.459548	1.572714	-1.156951	-1.331509	0.451568	0.004630
-1.148435	-0.568889	-0.228503	1.986914	-0.442786	0.401700
1.148182	-0.315287	-0.131685	0.361329	-1.922290	0.859752
1.245791	1.239548	0.280134	-1.152099	-0.487130	-1.126243
-1.681088	1.325498	0.603783	-0.765929	0.666159	-0.148422
1.537670	1.212968	-0.960227	-0.763202	-0.256514	-0.770693
1.498421	-0.476369	0.066837	-1.757267	0.659758	0.008620
0.880739	1.110565	0.980774	-1.175878	-0.769320	-1.026881
-0.835973	-0.119279	1.525193	-0.658294	-1.079090	1.167443
-0.992609	-0.071102	1.972401	-0.872811	0.404471	-0.440350
-0.597897	-0.107142	2.010120	-1.040835	0.353781	-0.618027

[12] //-------------------------------------------------------

[13] // 2) find principal components of rowvectors;

[14] // note: due to initial centering of rowvectors there are

[15] // maximally only n-1 independent components!

[16] // recenter datamatrix columnwise

[17] ME = meansp(Z) // rowvector of means

ME :

0.322915

0.275220

0.365006

-0.698682

-0.084519

-0.179940

[18] C = Z - ME // recentered data, column-means = 0

C :

1.595469	-0.035138	-0.483707	0.586952	-0.445371	-1.218204
1.417168	0.012079	0.220270	-0.597159	-0.594064	-0.458293
1.373000	-1.731733	0.034525	0.094731	0.555622	-0.326145
0.352524	-1.207485	0.379138	0.208816	-1.307216	1.574222
-0.694746	0.243409	-0.853866	-1.051903	1.354052	1.003054
-1.260522	1.771185	-0.770773	-0.144388	-0.000213	0.404712
-2.309369	-0.229238	-0.452159	0.739445	0.892454	1.358868
1.358206	-0.090200	-0.003405	0.063320	0.106045	-1.433967
0.205988	-0.508680	0.983165	-0.415737	0.930380	-1.195116
0.136633	1.297494	-1.521956	-0.632827	0.536087	0.184570
-1.471350	-0.844109	-0.593509	2.685596	-0.358268	0.581640
0.825266	-0.590507	-0.496691	1.060012	-1.837772	1.039692
0.922876	0.964327	-0.084872	-0.453417	-0.402611	-0.946303
-2.004003	1.050277	0.238777	-0.067247	0.750678	0.031518
1.214754	0.937747	-1.325233	-0.064520	-0.171996	-0.590753
1.175506	-0.751589	-0.298169	-1.058584	0.744276	0.188560
0.557824	0.835345	0.615769	-0.477195	-0.684801	-0.846941
-1.158888	-0.394499	1.160187	0.040388	-0.994571	1.347383
-1.315524	-0.346322	1.607395	-0.174129	0.488990	-0.260410
-0.920812	-0.382362	1.645115	-0.342153	0.438299	-0.438087

[19] // get the required rotation-matrix T first

[20] T = gettrans(C',"pca")'

[21] PC_n = T * C // the first n-1 rows contain the principal components

PC_n :

5.388984	-0.015400	-0.580167	-1.225378	-0.773942	-2.794096
-1.002772	3.012910	-0.668002	-2.315867	2.448157	-1.474426
0.411061	1.740197	-3.471250	1.081722	-1.094489	1.332760
-0.611711	1.684277	1.157263	0.992924	-1.878140	-1.344614
-0.296343	-0.610486	-0.768131	2.007980	1.322664	-1.655684
0.000000	0.000000	-0.000000	0.000000	-0.000000	0.000000
-0.000000	0.000000	0.000000	0.000000	-0.000000	-0.000000
-0.000000	-0.000000	-0.000000	0.000000	0.000000	-0.000000
-0.000000	-0.000000	0.000000	0.000000	-0.000000	0.000000
0.000000	-0.000000	-0.000000	-0.000000	-0.000000	-0.000000
0.000000	0.000000	-0.000000	0.000000	0.000000	0.000000
0.000000	0.000000	0.000000	0.000000	-0.000000	-0.000000
0.000000	0.000000	0.000000	-0.000000	0.000000	-0.000000
0.000000	-0.000000	-0.000000	-0.000000	0.000000	-0.000000
-0.000000	0.000000	-0.000000	-0.000000	0.000000	0.000000
0.000000	-0.000000	0.000000	-0.000000	0.000000	0.000000
0.000000	-0.000000	-0.000000	0.000000	-0.000000	0.000000
-0.000000	0.000000	-0.000000	0.000000	-0.000000	-0.000000
0.000000	-0.000000	0.000000	-0.000000	0.000000	-0.000000
-0.000000	-0.000000	0.000000	0.000000	0.000000	0.000000

[22] // 3) now set all rowvectors with index k>m to zero into a new matrix PC_m

// in the above table all shaded rowvectors are set to zero

[23] PC_m = { PC_n[1..m,*], Null(v-m,n) } // only the first 3 rowvectors are nonzero

[24] // 4) reverse the rotation where only the m-components are used

[25] C_m = T' * PC_m

C_m :

1.755965	-0.100569	-0.436641	-0.076575	-0.541828	-0.600352
1.391549	-0.188951	0.015763	-0.264869	-0.247687	-0.705805
1.199789	-0.943564	0.652155	0.042700	-0.463334	-0.487745
0.170699	-1.239481	0.226960	1.074123	-1.031846	0.799545
-1.020393	0.950986	-0.413904	-0.320935	0.650110	0.154136
-1.171442	1.365591	-1.088644	-0.117363	0.524211	0.487646
-2.387733	0.222993	-0.083633	0.613518	0.281759	1.353096
1.474607	-0.068471	0.095177	-0.491965	-0.072190	-0.937157
0.241433	-0.327815	1.175502	-0.771353	0.603388	-0.921155
0.033387	1.502684	-1.400302	-0.380582	0.342868	-0.098055
-1.229440	-0.711087	-0.306785	1.437521	-0.866202	1.675993
0.836085	-0.847704	-0.728997	1.285887	-1.448635	0.903364
1.044945	0.527917	-0.409247	-0.544182	0.129170	-0.748603
-1.887618	0.815493	0.098398	-0.347871	0.973933	0.347664
1.256657	0.927649	-1.306439	-0.245231	-0.208110	-0.424526
0.857861	-0.042958	0.148162	-0.365320	0.028768	-0.626513
0.717834	0.154887	0.089592	-0.480130	0.181929	-0.664112
-1.192187	-0.864870	0.699611	0.696104	-0.217129	0.878470
-1.237164	-0.547944	1.472316	-0.316453	0.707459	-0.078213
-0.854834	-0.584786	1.500954	-0.427022	0.673366	-0.307678

[26] Z_m = C_m + ME

Z_m :

2.078880	0.174651	-0.071635	-0.775258	-0.626347	-0.780292
1.714464	0.086269	0.380768	-0.963551	-0.332206	-0.885745
1.522704	-0.668344	1.017161	-0.655983	-0.547852	-0.667686
0.493614	-0.964260	0.591966	0.375440	-1.116365	0.619605
-0.697478	1.226206	-0.048898	-1.019617	0.565591	-0.025804
-0.848527	1.640812	-0.723638	-0.816046	0.439693	0.307706
-2.064818	0.498213	0.281373	-0.085165	0.197241	1.173156
1.797522	0.206749	0.460182	-1.190648	-0.156709	-1.117097
0.564348	-0.052595	1.540507	-1.470035	0.518869	-1.101095
0.356302	1.777904	-1.035296	-1.079264	0.258349	-0.277995
-0.906525	-0.435866	0.058221	0.738839	-0.950721	1.496052
1.159000	-0.572484	-0.363991	0.587204	-1.533153	0.723424
1.367860	0.803138	-0.044241	-1.242864	0.044651	-0.928543
-1.564703	1.090714	0.463404	-1.046553	0.889414	0.167724
1.579572	1.202869	-0.941433	-0.943914	-0.292629	-0.604466
1.180776	0.232263	0.513168	-1.064002	-0.055750	-0.806453
1.040749	0.430108	0.454597	-1.178813	0.097411	-0.844052
-0.869271	-0.589650	1.064617	-0.002578	-0.301647	0.698530
-0.914249	-0.272724	1.837322	-1.015136	0.622940	-0.258153
-0.531919	-0.309566	1.865960	-1.125705	0.588847	-0.487618

[27] // 5) check quality of approximation

[28] chk = (Z-Z_m) // Check differences

chk :

-0.160497	0.065431	-0.047067	0.663527	0.096457	-0.617852
0.025619	0.201030	0.204507	-0.332290	-0.346377	0.247512
0.173211	-0.788169	-0.617630	0.052032	1.018956	0.161601
0.181826	0.031996	0.152178	-0.865306	-0.275369	0.774676
0.325647	-0.707577	-0.439963	-0.730968	0.703943	0.848918
-0.089080	0.405594	0.317871	-0.027025	-0.524425	-0.082934
0.078364	-0.452231	-0.368526	0.125927	0.610694	0.005771
-0.116401	-0.021729	-0.098581	0.555285	0.178235	-0.496809
-0.035445	-0.180865	-0.192336	0.355616	0.326992	-0.273962
0.103245	-0.205190	-0.121655	-0.252244	0.193219	0.282625
-0.241910	-0.133022	-0.286724	1.248075	0.507934	-1.094353
-0.010818	0.257197	0.232306	-0.225875	-0.389137	0.136328
-0.122069	0.436410	0.324375	0.090765	-0.531781	-0.197700
-0.116386	0.234784	0.140379	0.280624	-0.223256	-0.316146
-0.041903	0.010099	-0.018794	0.180711	0.036114	-0.166227
0.317645	-0.708631	-0.446331	-0.693264	0.715508	0.815073
-0.160010	0.680457	0.526177	0.002935	-0.866731	-0.182828
0.033299	0.470371	0.460576	-0.655716	-0.777443	0.468913
-0.078360	0.201622	0.135079	0.142324	-0.218469	-0.182197
-0.065978	0.202424	0.144160	0.084869	-0.235067	-0.130409

[29] err = (sqrt(sqsum(chk))) // check overall-error

err :

4.521591