Confidence-interval for correlation

.

## -*- texinfo -*-
## @deftypefn {Function File} {[R , LB , UB]} = corri (@var{x} [,alpha])
## @deftypefnx {Function File} {[R , LB , UB]} = corri (@var{x}, @var{y} [,alpha])

## Compute matrix of correlation coefficients.
##

## If alpha is given compute the confidence-interval of the correlation coefficients.
##
## Alpha must be a scalar in the range 0 < alpha < 1. The distribution of the correlation coefficients is first standardized with a fisher-transformation. The interval is computed by adding/subtracting the corresponding value of the inverted normal distribution. Finally the bounds are retransformed with the inverted fisher-transformation.

##
## If each row of @var{x} and @var{y} is an observation and each column is
## a variable, then the @w{(@var{i}, @var{j})-th} entry of

## @code{R = corri (@var{x}, @var{y})} is the correlation between the
## @var{i}-th variable in @var{x} and the @var{j}-th variable in @var{y}.
##
## R is the matrix of the correlation coefficients, LB and UB are the lower and upper bounds of the correlation-interval.
##
## @tex
## $$
## {\rm R = corr}(x,y) = {{\rm cov}(x,y) \over {\rm std}(x) {\rm std}(y)}
## $$
## $$
## {\rm LB} = Finv ( F({\rm corr}(x,y)) - ( norminv(1-((1-ralpha)/2)) ./ sqrt(n-3) ) )
## $$
## $$
## {\rm UB} = Finv ( F({\rm corr}(x,y)) + ( norminv(1-((1-ralpha)/2)) ./ sqrt(n-3) ) )
## $$

## @end tex
## @ifnottex
##
## @example

## R = corr (x,y) = cov (x,y) / (std (x) * std (y))
## LB = Finv ( F(corr(x,y)) - ( norminv(1-((1-ralpha)/2)) / sqrt(n-3) ) )
## UB = Finv ( F(corr(x,y)) + ( norminv(1-((1-ralpha)/2)) / sqrt(n-3) ) )

## @end example
##
## @end ifnottex

## If called with one vector or matrix, compute @code{corr (@var{x}, @var{x})},
## the correlation between the columns of @var{x}.

## @seealso{cov}
## @end deftypefn

## Author: Kurt Hornik <address@hidden>
## Created: March 1993
## Adapted-By: jwe

## Adapted-By: Stefan Neumann <address@hidden>
## May 2012 : added confidence intervals

function [ retval , lb , ub ] = corri (x, y, alpha = [])

if (nargin < 1 || nargin > 3)
print_usage ();
endif

## Input validation is done by cov.m. Don't repeat tests here

## Special case, scalar is always 100% correlated with itself
if (isscalar (x))
    if (isa (x, 'single'))
      retval = single (1);
    else
      retval = 1;
    endif

return;
endif

## No check for division by zero error, which happens only when
## there is a constant vector and should be rare.

ralpha = -999 ; # ralpha stores alpha from the function call. If it is still -999 after parsing the arguments the function call was invalid.

# --- case 2: 2 matrices , no alpha --- #
if (nargin == 2 && ! isscalar(y))

    c = cov (x, y);
    s = std (x)' * std (y);
    retval = c ./ s;

ralpha = -999 ; # -999 -> do not compute confidence-interval
end

# --- case 3: 2 matrices plus alpha --- #
if (nargin == 3 && isscalar(alpha))

    c = cov (x, y);
    s = std (x)' * std (y);
    retval = c ./ s;

ralpha = alpha ;
end

# --- case 4: 1 matrix plus alpha --- #
if (nargin == 2 && isscalar(y))

    c = cov (x);
    s = sqrt (diag (c));
    retval = c ./ (s * s');

ralpha = y ;
endif

# --- case 5: 1 matrix , no alpha --- #
if (nargin == 1)

    c = cov (x);
    s = sqrt (diag (c));
    retval = c ./ (s * s');

    ralpha = -999 ; # -999 -> do not compute confidence-interval
endif

if (ralpha!=-999 && (ralpha <= 0 || ralpha >=1))
    error "Error, alpha must be in the range of 0 < alpha < 1"
end
if (ralpha!=-999 && !isscalar(ralpha))
    error "Error, alpha must be a scalar"
end

# --- calculate the confidence interval --- #
clear alpha
if (ralpha != -999)
    # --- calculate interval only for -1<r<1, otherwise assume width=0 --- #
    bassume = ones(size(retval)) ;
    rint = retval ;
    bassume(rint <= -1) = 0 ;
    bassume(rint >= 1) = 0 ;
    rint(rint <= -1) = 0 ;
    rint(rint >= 1) = 0 ;

    Z      = F___(rint) ;

# width of the the confidence-interval for Z

n = rows(x) ;
DZ = norminv(1-((1-ralpha)/2)) ;

DZ = DZ ./ sqrt(n-3) ;
# calculate lower and upper bound of interval, retransform to original distribution

    lb     = Z - DZ ; lb = Finv___(lb) ; lb = bassume .* lb + (1-bassume) .* retval ;
    ub     = Z + DZ ; ub = Finv___(ub) ; ub = bassume .* ub + (1-bassume) .* retval ;
end

endfunction

# F(r) returns 0 for any r<-1 or r>1
function F___    = F___(r)      ; F___    = ((r>-1).*(r<=1)) .* log( (1+r)./(1-r) ) / 2 ; end

function Finv___ = Finv___(z) ; Finv___ = ( ( exp(2*z) - 1 ) ./ ( exp(2*z) + 1 ) ) ; end

%!test
%! x = rand (10);
%! cc1 = corr (x);
%! cc2 = corr (x, x);
%! assert (size (cc1) == [10, 10] && size (cc2) == [10, 10]);
%! assert (cc1, cc2, sqrt (eps));

%!test
%! x = [1:3]';
%! y = [3:-1:1]';
%! assert (corr (x,y), -1, 5*eps)
%! assert (corr (x,flipud (y)), 1, 5*eps)
%! assert (corr ([x, y]), [1 -1; -1 1], 5*eps)

%!test
%! x = single ([1:3]');
%! y = single ([3:-1:1]');
%! assert (corr (x,y), single (-1), 5*eps)
%! assert (corr (x,flipud (y)), single (1), 5*eps)
%! assert (corr ([x, y]), single ([1 -1; -1 1]), 5*eps)

%!assert (corr (5), 1);
%!assert (corr (single(5)), single(1));

%% Test input validation
%!error corr ();
%!error corr (1, 2, 3);
%!error corr ([1; 2], ["A", "B"]);
%!error corr (ones (2,2,2));
%!error corr (ones (2,2), ones (2,2,2));

From:	stn
Subject:	Confidence-interval for correlation
Date:	Thu, 31 May 2012 09:09:16 +0200