\documentclass[12pt]{article}
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{lscape}

\hbadness=10000
\textwidth=7.7in  \textheight=15in
\hoffset=-1.25in \voffset=-.75in
\pagestyle{empty}
\newcommand{\boxspace}{\parbox{1.42in}}
\newcommand{\hhspace}{\hspace{0.02in}}
%
\newcommand{\bboxspace}{\parbox{2.0in}}
\newcommand{\hhhspace}{\hspace{0.05in}}
\input{tcilatex}



\title{Factor Analysis}
\author{Jaime Frade}
\date{\today}

\begin{document}

\maketitle
\vspace{1in}

%\begin{tabular}{c c}
%\; & \hspace{.65cm} \pdfimage width 16cm {university-logo3.png}
%\end{tabular}
%\begin{sloppypar}
%\begin{abstract}
%\end{abstract}

%order
%\part{Headline of your part}
%\section{Headline of your section}
%\subsection{}
%\subsubsection{}
%\paragraph{}
%\subparagraph{}

\newpage
\part{9.4: Factor Rotation}
\section{Review:}


\subsection{Covariance Structure for the Orthogonal Factor Model}
\begin{enumerate}
\item Cov($\mathbf{X}) = \mathbf{LL^{t}} + $

\begin{eqnarray}\label{eq:9_5}
	\mathrm{Var}(X_i) &=& l^2_{i1} + \cdots + l^2_{im} + \Psi \nonumber
	\\
	\;	
	\\
	\mathrm{Cov}(X_i) &=& l_{i1}l_{k1}+ \cdots + l_{im}l_{km} + \Psi \nonumber
\end{eqnarray}

\item Cov($\mathbf{X, F}) = \mathbf{L} $
\\
or
 \begin{eqnarray}
	\mathrm{Cov}(X_i, F_i) &=& l_{ij}  \nonumber
\end{eqnarray}

\end{enumerate}


It has been shown \textbf{(9-8) pg 487}, that all factor loadings obtained from the initial loadings by an orthogonal transformation have the same ability to reproduce the covariance (or correlation) matrix.  An orthogonal transformation is a rotation or reflection of the coordinate axis.
\vspace{.25in}
\\
Factor analysis is a generic term for a family of statistical 
\\techniques concerned with the reduction of a set of observable variables 
\\in terms of a small number of latent factors. It has been developed primarily 
\\for analyzing relationships among a number of measurable entities (such as survey items or test scores). The underlying assumption of factor analysis 
\\
is that there exists a number of unobserved latent variables 
\\
(or 'factors') that account for the correlations among observed variables, 
\\
such that if the latent variables are partialled out or held constant, 
\\
the partial correlations among observed variables all become zero. 
\\
In other words, the latent factors determine the values of the observed variables.
\vspace{.25in}
\\
The primary purpose of factor analysis is data reduction and summarization
\newpage
\section{Definition:}
\vspace{.25in}
\textbf{Factor Rotation:} An orthogonal transformation of the \underline{factor loadings}, as well as the 
\\
implied orthogonal transformation of the \underline{factors}.
\\
Let $T_{m \times m}$ be an orthogonal matrix, 
\\
$(TT^{t} = T^{t}T = I_{m \times m})$.
\vspace{.5in}
\\
If $\widehat{L}$ is the $p \times m$ matrix of estimated factor loading obtained by any method (PCP, ML, etc.) then
%\begin{equation}\label{eq:9_42}
%\widehat{L}^{\ast} = \widehat{L}T, \hspace{.5cm} \textrm{where}  \hspace{.15cm} TT^{t} = T^{t}T = I
%\end{equation}
\\
is the $p \times m$ matrix of 'rotated' loadings.  $\widehat{L}^{\ast}$ is an orthogonal transformation of the factor loading matrix.
\\
The estimated covariance (or correlation) matrix remains unchanged.
\vspace{.5in}
\\
\begin{eqnarray}\label{eq:9_43}
\widehat{L} \widehat{L}^{t} + \widehat{\Psi} &=&  \widehat{L}TT^{t}\widehat{L}^{t} + \widehat{\Psi} \nonumber\
\\
&=&  \widehat{L}^{\ast} (\widehat{L}^{\ast})^{t} + \widehat{\Psi}
\end{eqnarray}
indicates that the residual matrix remains unchanged.
\vspace{.5in}
\\
%\begin{equation}
%S_n -  \widehat{L}\widehat{L}^{t} - \widehat{\Psi}  = S_n - \widehat{L}^{\ast}\widehat{L}^{\ast}\, - \widehat{\Psi}\nonumber
%\end{equation}\vspace{.5in}
%\\
Also, the estimated specific variances and communalities, $\widehat{\Psi}_i$, and hence the $\widehat{h^2_i}$, are unchanged by orthogonal transformations of $\widehat{L}$.
\vspace{.5in}
\\

Therefore, can still use either $\widehat{L}$ or $\widehat{L}^{\ast}$.


%=======================================================================================================
\newpage


The examples illustrate graphical and analytical methods for determining an orthogonal rotation to a simple structure.

\section{Examples}
\subsection{Example 9.8 (First look at factor rotation.) Oblique transformation.}
\vspace{.5in}
\\
Presented with the sample correlation matrix of examination scores in $p=6$ subject areas for $n = 220$ male students.  The correlation matrix is

$
R = 
\left[\begin{array}
\\
1.0 & 0.439 & 0.410 & 0.288 & 0.329 & 0.248
\\
\; & 1.0 & 0.351 & 0.354 & 0.320 & 0.329
\\
\; & \; & 1.0 &   0.164 & 0.190 & 0.181
\\
\; & \; & \; & 1.0 & 0.595 & 0.470
\\
\; & \; & \; & \; & 1.0 & 0.464
\\
\; & \; & \; & \; & \; & 1.0
\end{array}\right]
$




%=======================================================================================================
\newpage
\section{Varimax Criterion}

Define the 'scaled' loading, $\widetilde{l}^\ast_{ij}$, of the i-th variable on the j-th factor \underline{after} rotation.  Rotated coffecients scaled by the square root of the communalities.

\begin{equation}
\widetilde{l}^\ast_{ij} = \dfrac{\widehat{l}^\ast_{ij}}{\widehat{h}_{i}} \nonumber
\end{equation}

Then the varimax procedure selects the orthogonal transformation that maximizes

\begin{equation}\label{eq:9_45}
V = \dfrac{1}{p} \sum^{m}_{j=1} \left[\sum^p_{j=1}(\widetilde{l}^\ast_{ij})^4 - \dfrac{1}{p}\left(\sum^p_{i=1} \left(\widetilde{l}^\ast_{ij}\right)^2\right)^2\right]
\end{equation}

Interpretation of (\ref{eq:9_45}), 
\begin{equation}
V \varpropto \sum^m_{j=1} \left( \textrm{Variance of squares of \underline{scaled} loading for the j-th factor} \right)
\end{equation}

\textbf{Notes:}  Scaling gives the variables with smaller communalities more influence.  Overall, maximizing V (\ref{eq:9_45}) corresponds to 'spreading out' the squares of the loadings on each factor as much as possible.  Therefore, we hope to find groups of large and negligible coefficients in any \textit{column} of the rotated ladings matrix, $\widetilde{l}^\ast_{ij}$.

\subsection{Example 9.9(2) (Rotated loadings for stock-price data.)}
Returning to the data discussed in example 9.3 \textbf{pg 493}.  The original factor loadings (obtained by PCP), the communalities, and the (varimax) rotated factor loadings are shown in Table (??) below.  SAS code given as well.

\htmladdnormallink[Table 1:]{http://www.stat.fsu.edu/\~{}jfrade/example9-10-1.htm} 

It is clear that the bank stocks (JP Morgan, Citibank, and Wells Fargo) load highly on the first factor, while the oil stocks, (Shell and Mobil) load highly on the second factor.  Factor 1 may indicate those unique factors that case bank stocks to move together. (??) and factor 2 represents conditions affecting oil stocks.

\htmladdnormallink[Table 2:]{http://www.stat.fsu.edu/\~{}jfrade/example9-10.htm} 

The factor loadings for the variables are pictured below with respect to the original and (varimax) rotated factor axes in below figure.  After rotation each of the $p$ variables should have a high loading on only one factor, however, this is not always possible.

%%insert table here
\htmladdnormallink[Figure 3:]{http://www.stat.fsu.edu/\~{}jfrade/example9-10-2.htm} 

The general market factor was destroyed by rotation.  Analyze the relationship to pattern of correlations.


%=======================================================================================================
\newpage
\part{9.5: Factor Scores}

Factor analysis centers the focus on the parameters in the factor model, however the estimated values of the common factors, \textbf{factor scores}, may also be of interest.  These quantities are often used for diagonstic purposes, as well as inputs to a subsequent analysis.

Factor scores are estimates of values for the unobserved random factor vectors, $\mathbf{F}_{j}$, $j = 1, 2, ldots , n.$

\begin{equation}
\mathbf{\widehat{f}_{j}} = \textrm{estimate of the values} \hspace{.15cm} \mathbf{f_j} \hspace{.15cm} \textrm{attained by} \hspace{.15cm} \mathbf{F_j} \hspace{.15cm} \textrm{ (\textit{j}th case) } \nonumber
\end{equation}

However, estimation is complicated by the fact that unobserved quantities, $\mathbf{f_j}$ and $\mathbf{\epsilon_j}$, outnumber the observed $\mathbf{x_j}$.  The following two methods, Weighted Least Squares and Regression Methods, describe ways to overcome this complication.


\section{Estimation of Factor Scores for both models}

Suppose that the mean vector, $\mathbf{\mu}$, the loading factors, $\mathbf{L}$, and the specific variance $\Psi$ are known for the factor model, 

\begin{equation}\label{eq:9_47a}
(\mathbf{X_j} - \mathbf{\mu})_{(p \times 1)} ={\mathbf{L}}_{(p \times m}\mathbf{F}_{(m \times 1)} + \epsilon_{(p \times 1}
\end{equation}

regard the specific factors $\epsilon' = \left[\epsilon_1, \epsilon_2, \ldots, \epsilon_p\right]$ as errors.  Since the Var($\epsilon_i$) = $\Psi_i$, $i = 1, 2, \ldots, p$ need not be equal, thus

\begin{equation}
\textrm{Var}(\epsilon_i) = \mathbf{\Psi} =
\left[ \begin{array}{c c c c c}
\Psi_{1} & 0 &  0 & \ldots & \ldots \\
0 & \Psi_{2} & 0 & \ldots & \ldots \\
0 & 0 &  \Psi_{3} & \ldots & \ldots \\
\vdots & \vdots & \vdots  & \ddots & \vdots \\
0 & 0 &  0 & \ldots  & \Psi_{p} 
\end{array} \right]
\end{equation}


will use the weighted least squares to estimate the common factor values.

%===========================================================================================
\newpage

\section{Weighted Least Squares Method}

WLS estimation of $\widehat{f}_j$ of $f_j$ is 

\begin{eqnarray}\label{eq:9_48}
\mathbf{f}_j 
& = & \left(\mathbf{L}'\mathbf{\Psi}^{-1}\mathbf{L}\right)^{-1}\mathbf{L}'\mathbf{\Psi}^{-1}\left(\mathbf{x} - \mathbf{\mu}\right)
\end{eqnarray}


Using (\ref{eq:9_48}), take estimates $\widehat{\mathbf{L}}, \widehat{\mathbf{\Psi}}, \& \widehat{\mathbf{\mu}} = \bar{\mathbf{x}}$ as true values to obtain the factor scores for the \textit{j}-th case:


\begin{eqnarray}\label{eq:9_49}
\widehat{\mathbf{f}}_j 
& = & \left(\widehat{\mathbf{L}}'\widehat{\mathbf{\Psi}}^{-1}\widehat{\mathbf{L}}\right)^{-1}\widehat{\mathbf{L}}'\widehat{\mathbf{\Psi}}^{-1}\left(\mathbf{x_j} - \bar{\mathbf{x}}\right)
\end{eqnarray}

In (\ref{eq:9_49}), the estimates $\widehat{\mathbf{L}}, \widehat{\mathbf{\Psi}}$ are obtained by the maximum likelihood method and must satisfy the uniqueness condition, $\widehat{\mathbf{L}}'\widehat{\mathbf{\Psi}}^{-1}\widehat{\mathbf{L}} = \widehat{\Delta}$, a diagonal matrix.

\textbf{Note:}  Ordinary (unweighted) least squares estimation is sometimes used when factor loadings are obtained from the principal component method, since specific variances tend to be more nearly equal. (i.e. $\widehat{\Psi_1} = \cdots =  \widehat{\Psi_p})$

The factor scores are then, 


\begin{eqnarray}
\widehat{\mathbf{f}}_j 
& = & \left(\widetilde{\mathbf{L}}'\widetilde{\mathbf{L}}\right)^{-1}\widetilde{\mathbf{L}}'\left(\mathbf{x_j} - \bar{\mathbf{x}}\right) \label{eq:9_49b1}
\\
& = & \left(\widetilde{\mathbf{L}_\mathbf{z}}'\widetilde{\mathbf{L}_\mathbf{z}}\right)^{-1}\widetilde{\mathbf{L}_\mathbf{z}}'\mathbf{z}_j \label{eq:9_49b2}
\end{eqnarray}

where, (\ref{eq:9_49b2}), is for the standardized data.  



Since, (see (9-15) \textbf{pg. 490}
\begin{equation}\label{eq:9_51a}
\widetilde{\mathbf{L}} = 
\left[ \begin{array}{c}
\sqrt{\widehat{\lambda}_1} \widehat{\mathbf{e}}_1\\
\sqrt{\widehat{\lambda}_2} \widehat{\mathbf{e}}_2\\
\vdots \\
\sqrt{\widehat{\lambda}_m} \widehat{\mathbf{e}}_m
\end{array} \right]
\end{equation}

Using (\ref{eq:9_51a}), into (\ref{eq:9_49b1}) will obtain

\begin{equation}\label{eq:9_51}
\widehat{\mathbf{f}}_j  = 
\left[ \begin{array}{c}
\dfrac{1}{\sqrt{\widehat{\lambda}_1}} \widehat{\mathbf{e}}_1'\left(\mathbf{x_j} - \bar{\mathbf{x}}\right)\\
\dfrac{1}{\sqrt{\widehat{\lambda}_2}} \widehat{\mathbf{e}}_2'\left(\mathbf{x_j} - \bar{\mathbf{x}}\right)\\
\vdots \\
\dfrac{1}{\sqrt{\widehat{\lambda}_m}} \widehat{\mathbf{e}}_m'\left(\mathbf{x_j} - \bar{\mathbf{x}}\right)
\end{array} \right]
\end{equation}

Comparing, from princpal component analysis, (see (8-21), \textbf{pg 443}), can obtain the \textit{i}th principal component $\widehat{y}_{ji} = \widehat{\mathbf{e}}_i'(\mathbf{x} - \bar{\mathbf{x}})$ for any observation vector, $\mathbf{x}$.

Comparing (\ref{eq:9_51}) with above (8-21), $\widehat{\mathbf{f}}_j$ is the first $m$ (scaled) principal components evaluated at $\mathbf{x}_j$.

\newpage
%===========================================================================================
\section{Regression Method}

Starting from original factor model in (\ref{eq:9_47a}), treat the loadings matrix, $\mathbf{L}$ and $\mathbf{\Psi}$ as known values.

Consider the joint distribution of $(\mathbf{x}_j - \mu)$ and $\mathbf{F}_j$.  Assume the multivariate normality as in the maximum likelihood approach to factor analysis.  

\begin{equation}\label{eq:9_52a}
\left[ \begin{array}{c}
\mathbf{x}_j - \mathbf{\mu} \\
\mathbf{F}_j
\end{array} \right]
\sim 
N_{p+m}\left(\mathbf{0}, \mathbf{\sum}^{\ast} \right)
\end{equation}



\subsection{Example 9.12 (Computing Factor Scores by WLS and Regression.)}


%=======================================================================================================
\part{9.6: Perspectives and a Strategy for Factor Analysis}
\section{Suggestible steps for factor analysis}
\begin{enumerate}
\item \textit{Perform PCP factor analysis}
\begin{description}
\item[a.] 
\item[b.]
\end{description}
\item \textit{Perform a maximum likelihood factor analysis, including a varimax rotation}
\item \textit{Compare the solutions obtained from the two factor analysis}
\begin{description}
\item[a.]
\item[b.]
\end{description}
\item \textit{Repeat the first three steps for other numbers of common factors \textbf{m}.}
\item \textit{For large data sets, split them in half and perform a factor analysis on each part.}
\end{enumerate}

\subsection{Example 9.14 (chicken-bone data)}
%\end{sloppypar}
\end{document}
