function [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank,method,reportflag) %PCA - Principal Component Analysis of data matrix % PCA performs principal component (EOF) analysis on input % time series, using either the SVD method or the COVARIANCE % MATRIX method, depending on the size of the problem. See Emery % and Thompson or Preisendorfer for details. % % Input: D - a matrix of observation time series, one series % per column. In the case that the columns represent % different physical quantities, the varnorm flag below % can be used to normlize the column variance, by % dividing each series by it's demeaned std. % meantrend - binary flag (0|1) specifying whether (or not) % to remove mean and linear trends from the column % data; the default is to remove both mean and % linear trend. % varnorm - binary flag (0|1) specifying whether (or not) % the code should perform variance normaliation % of the observation matrix D columns. The default % is NOT to normalize the variance. % k_rank - number of eigenvalues/vectors/functions to return. % If k_rank>rank(D), then this option does nothing. % If k_rank==0, then all eig's are returned; this is % the default. Inputting k_rank < rank(D) amounts % to returning the rank-k approximation to the data % matrix. k_rank can also be the string 'SIG', which % tells the routine to determine the number of % statistically signifigant modes and return that % k_rank approximation. Specifying k_rank to anything % other than 0 forces the method to be 'SVD'. % method - override the method selection; either 'COV','SVD' % or 'SEL' to let the code select the method % report - binary flag (0|1) to write a "report" to stdout % of the "success" of the analysis. Default is no report. % % Output: % evals - vector of eigenvalues of the covariance matrix (D'*D) % evecs - matrix of eigenvectors of the covariance matrix % efuns - matrix of eigenfunctions (amplitude functions, e.g.) % edata - k_rank approximation to the data matrix; this matrix % will only be returned if 0 < k_rank < rank(D). % % NOTES: 1) Use COMP_VAR_EL_PARAMS to rotate vector series onto principal % axes if data resprsents vector series. % % Call as: [evals,evecs,efuns]=pca(D); % OR: [evals,evecs,efuns]=pca(D,meantrend); % OR: [evals,evecs,efuns]=pca(D,meantrend,varnorm); % OR: [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank); % OR: [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank,method); % OR: [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank,method,report); % if nargin==0 disp(' Call as: [evals,evecs,efuns]=pca(D);'); disp(' [evals,evecs,efuns]=pca(D,meantrend);'); disp(' [evals,evecs,efuns]=pca(D,meantrend,varnorm);'); disp(' [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank);'); disp(' [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank,method);'); disp(' [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank,method,report);'); evals=[];evecs=[];efuns=[]; return end %% PROCESS INPUT ARGUMENTS if isstr(D)&strcmp(lower(D),'refs') disp('Preisendorfer, PCA in Meteorology and Oceanography, Elsevier, 1988.'); disp('Overland and Preisendorfer, Signifigance test for PCA, Monthly Weather Review, 1982.'); disp('Kelly, JPO, 1988'); disp('Emery and Thompson, Data Analysis Methods in Physical Oceanography, 1998.') evals=[];evecs=[];efuns=[];edata=[]; return end if exist('meantrend') if meantrend~=0 & meantrend~=1 error('MEANTREND flag to PCA must be 0|1.') end else meantrend=1; end if exist('varnorm') if varnorm~=0 & varnorm~=1 error('VARNORM flag to PCA must be 0|1.') end else varnorm=0; end sig_test='no'; if exist('k_rank') if isstr(k_rank) if ~strcmp(lower(k_rank),'sig') error('if K_RANK to PCA is a string, it must be the string ''SIG''.') end sig_test='yes'; end if k_rank<0 error('K_RANK to PCA >=0 or the string ''SIG''.') end else k_rank=0; end if exist('method') if ~isstr(method)|((~strcmp(method,'COV'))&... (~strcmp(method,'SVD'))&... (~strcmp(method,'SEL'))) error('METHOD to PCA must be ''COV'' or ''SVD'' or ''SEL''') end else method='SEL'; end if isstr(k_rank),method='SVD';,end % Check the number of outgoing arguments. if nargout<4 error('Rank-k data matrix requested, but <4 output arguments provided.') end if exist('reportflag') if reportflag~=0 & reportflag~=1 error('REPORT flag to PCA must be 0|1.') end else reportflag=0; end % Get number of series (spatial points, variables, etc.) (M, % column count) and length of observations (N, row count). % Recall that the observation matrix is input with time % proceeding down the rows, and location (variables) across the % columns [M,N]=size(D'); % NOTE THE TRANSPOSE !!! % The relative sizes of M and N determine whether we compute % the EOFs from the covariance matrix or whether we use the % SVD method, unless overridden by user on input. if strcmp(method,'SEL') if M>N method='SVD'; else method='COV'; end end % Regardless of the method, we form a "data" matrix DT, which is % de-meaned and linearly de-trended, if the user has specified % meantrend==1. DM=D; if meantrend==1 % Demean data matrix D if reportflag disp(' ') disp(['Before: Mean = ',num2str(mean(D))]) disp(['De-meaning, De-trending data']) end [DM,MEAN]=demean(DM); % Some de-trending needs to happen here; atleast linear removal % Compute linear trend for each variable [DM,TRENDCOEFFS]=detrend2(DM); end % So far, observations have been de-meaned and linearly % de-trended. If the observation series % represent different physical quantities (temp and sal, for % example), then the user should have specified varmorm==1, so ... if varnorm STD=std(DM); if reportflag disp(' ') disp(['Normalizing variance.']) disp(['Before: Min STD = ',num2str(min(STD))]) disp([' Max STD = ',num2str(max(STD))]) end [DM,STDD]=normvar(DM); if reportflag STD=std(DM); disp(['After : Min STD = ',num2str(min(STD))]) disp([' Max STD = ',num2str(max(STD))]) end end % At this point, we transpose the data matrix to put the % time along the columns; DM=DM'; switch method case 'COV' % Form DM times DM-transpose, DDT DDT=(1/(N-1))*DM*DM';% This is the covariance matrix flops(0);tic; [U,d]=eig(DDT); FLOPS=flops; TOC=toc; [evals,iperm]=sort(diag(d)); iperm=flipud(iperm); % Flip to get descending order evals=flipud(evals); evecs=U(:,iperm); % Permute the columns of U efuns=DM'*evecs; edata=[]; % Rank-k data cannot be returned if method=='COV' case 'SVD' flops(0);tic; if reportflag,disp('Performing SVD...'),end [U,S,V]=svd(DM); FLOPS=flops; TOC=toc; % now, we compute the eigenvalues from the singular values; % they are the squares of the SVs, scaled by the length of % the time series -1, as in the covariance, matrix. evals=(1/(N-1))*diag(S).^2; % evecs are in U evecs=U; % Eigenfunctions efuns=V*S'; DMrank=length(diag(S)>eps); % Rank-k Approximation to the input data matrix if isstr(k_rank) % then it must be 'SIG', and determine the % statistically signifigant singular values. if reportflag,disp('Determining signifigance...'),end % set k_rank according to the following signifigance test. % The sig. test is based on a Monte Carlo simulation. % See Overland and Preisendorfer, Monthly Weather Review, % Jan. '82, Vol. 110, No. 1 for details, and notation. % Form the normalized eigenvalue statistic: T=evals/sum(evals); % eqn 1 % Form 100 replicates of a length DMrank by % M gaussian random noise matrix for r=1:100 F=randn(DMrank,M); delta(r,:)=(eig(F*F'))'; end for r=1:100 UU(r,:)=delta(r,:)/sum(delta(r,:)); end for j=1:M UU(:,j)=sort(UU(:,j)); end ik=find(T>UU(95,:)'); k_rank=ik(length(ik)); end if k_rank>0 & (k_rank<=min(M,N)) %return the k_rank approximation to the input data matrix. edata=(U(:,1:k_rank)*S(1:k_rank,1:k_rank)*V(:,1:k_rank)')'; if varnorm % add back in the std edata=edata.*(ones(size(edata(:,1)))*STDD); end if meantrend % add back in the means and trends tt=(1:length(edata(:,1)))';tt=tt-mean(tt); edata=edata+tt*TRENDCOEFFS; edata=edata+ones(size(edata(:,1)))*MEAN; end else % k_rank=0 edata=[]; end end if nargout==1 | nargout==0 efuns=[];evecs=[]; elseif nargout==2 efuns=[]; elseif nargout==3 edata=[]; end if reportflag disp(' ') disp(['Method = ' upper(method)]); disp(['Flops = ' int2str(FLOPS)]); disp(['O(MNN) = ' int2str(M*N*N)]); disp(['Time = ' num2str(TOC)]); disp(['Matrix Rank = ' num2str(DMrank)]); disp(' ') if strcmp(sig_test,'yes') disp(['Signifigant Rank (95%-conf level) = ' int2str(k_rank)]) end if k_rank>0 disp(['Rank-' int2str(k_rank) ' data matrix approximation returned in 4th output argument.']) disp(' ') end maxerr=max(max(abs(evecs*evecs')-eye(size(evecs)))); disp(['Max error in orthogonality of eigenvectors: ' num2str(maxerr)]) disp(' ') disp(' Variance Table') if strcmp(sig_test,'yes') disp(' (** indicates signifigance)') end disp(' Mode E-val %Var ') nr=min(M,N); pvar=cumsum(evals)/sum(evals)*100; for i=1:nr if i<=k_rank & strcmp(sig_test,'yes') fprintf(' ** %d %.3f %6.2f\n',[ i evals(i) pvar(i)]) else fprintf(' %d %.3f %6.2f\n',[ i evals(i) pvar(i)]) end end fprintf('Total %.3f\n',sum(evals(1:nr))) end % % Brian O. Blanton % Department of Marine Sciences % Ocean Processes Numerical Modeling Laboratory % 12-7 Venable Hall % CB# 3300 % University of North Carolina % Chapel Hill, NC % 27599-3300 % % 919-962-4466 % blanton@marine.unc.edu % % Summer 1999 %