3191
|
1 ## Copyright (C) 1995, 1996, 1997 Kurt Hornik |
|
2 ## |
|
3 ## This program is free software; you can redistribute it and/or modify |
|
4 ## it under the terms of the GNU General Public License as published by |
|
5 ## the Free Software Foundation; either version 2, or (at your option) |
|
6 ## any later version. |
|
7 ## |
|
8 ## This program is distributed in the hope that it will be useful, but |
|
9 ## WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 ## General Public License for more details. |
|
12 ## |
|
13 ## You should have received a copy of the GNU General Public License |
|
14 ## along with this file. If not, write to the Free Software Foundation, |
|
15 ## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
|
16 |
|
17 ## Performs ordinal logistic regression. |
|
18 ## |
|
19 ## Suppose Y takes values in k ordered categories, and let gamma_i (x) |
|
20 ## be the cumulative probability that Y falls in one of the first i |
|
21 ## categories given the covariate x. Then |
|
22 ## [theta, beta] = |
|
23 ## logistic_regression (y, x) |
|
24 ## fits the model |
|
25 ## logit (gamma_i (x)) = theta_i - beta' * x, i = 1, ..., k-1. |
|
26 ## The number of ordinal categories, k, is taken to be the number of |
|
27 ## distinct values of round (y) . If k equals 2, y is binary and the |
|
28 ## model is ordinary logistic regression. X is assumed to have full |
|
29 ## column rank. |
|
30 ## |
|
31 ## theta = logistic_regression (y) |
|
32 ## fits the model with baseline logit odds only. |
|
33 ## |
|
34 ## The full form is |
|
35 ## [theta, beta, dev, dl, d2l, gamma] = |
|
36 ## logistic_regression (y, x, print, theta, beta) |
|
37 ## in which all output arguments and all input arguments except y are |
|
38 ## optional. |
|
39 ## |
|
40 ## print = 1 requests summary information about the fitted model to be |
|
41 ## displayed; print = 2 requests information about convergence at each |
|
42 ## iteration. Other values request no information to be displayed. The |
|
43 ## input arguments `theta' and `beta' give initial estimates for theta |
|
44 ## and beta. |
|
45 ## |
|
46 ## `dev' holds minus twice the log-likelihood. |
|
47 ## |
|
48 ## `dl' and `d2l' are the vector of first and the matrix of second |
|
49 ## derivatives of the log-likelihood with respect to theta and beta. |
|
50 ## |
|
51 ## `p' holds estimates for the conditional distribution of Y given x. |
|
52 |
|
53 ## Original for MATLAB written by Gordon K Smyth <gks@maths.uq.oz.au>, |
|
54 ## U of Queensland, Australia, on Nov 19, 1990. Last revision Aug 3, |
|
55 ## 1992. |
|
56 |
|
57 ## Author: Gordon K Smyth <gks@maths.uq.oz.au>, |
|
58 ## Adapted-By: KH <Kurt.Hornik@ci.tuwien.ac.at> |
|
59 ## Description: Ordinal logistic regression |
|
60 |
|
61 ## Uses the auxiliary functions logistic_regression_derivatives and |
|
62 ## logistic_regression_likelihood. |
|
63 |
|
64 function [theta, beta, dev, dl, d2l, p] ... |
|
65 = logistic_regression (y, x, print, theta, beta) |
|
66 |
|
67 ## check input |
|
68 y = round (vec (y)); |
|
69 [my ny] = size (y); |
|
70 if (nargin < 2) |
|
71 x = zeros (my, 0); |
|
72 endif; |
|
73 [mx nx] = size (x); |
|
74 if (mx != my) |
|
75 error ("x and y must have the same number of observations"); |
|
76 endif |
|
77 |
|
78 ## initial calculations |
|
79 x = -x; |
|
80 tol = 1e-6; incr = 10; decr = 2; |
|
81 ymin = min (y); ymax = max (y); yrange = ymax - ymin; |
|
82 z = (y * ones (1, yrange)) == ((y * 0 + 1) * (ymin : (ymax - 1))); |
|
83 z1 = (y * ones (1, yrange)) == ((y * 0 + 1) * ((ymin + 1) : ymax)); |
|
84 z = z(:, any (z)); |
|
85 z1 = z1 (:, any(z1)); |
|
86 [mz nz] = size (z); |
|
87 |
|
88 ## starting values |
|
89 if (nargin < 3) |
|
90 print = 0; |
|
91 endif; |
|
92 if (nargin < 4) |
|
93 beta = zeros (nx, 1); |
|
94 endif; |
|
95 if (nargin < 5) |
|
96 g = cumsum (sum (z))' ./ my; |
|
97 theta = log (g ./ (1 - g)); |
|
98 endif; |
|
99 tb = [theta; beta]; |
|
100 |
|
101 ## likelihood and derivatives at starting values |
|
102 [g, g1, p, dev] = logistic_regression_likelihood (y, x, tb, z, z1); |
|
103 [dl, d2l] = logistic_regression_derivatives (x, z, z1, g, g1, p); |
|
104 epsilon = std (vec (d2l)) / 1000; |
|
105 |
|
106 ## maximize likelihood using Levenberg modified Newton's method |
|
107 iter = 0; |
|
108 while (abs (dl' * (d2l \ dl) / length (dl)) > tol) |
|
109 iter = iter + 1; |
|
110 tbold = tb; |
|
111 devold = dev; |
|
112 tb = tbold - d2l \ dl; |
|
113 [g, g1, p, dev] = logistic_regression_likelihood (y, x, tb, z, z1); |
|
114 if ((dev - devold) / (dl' * (tb - tbold)) < 0) |
|
115 epsilon = epsilon / decr; |
|
116 else |
|
117 while ((dev - devold) / (dl' * (tb - tbold)) > 0) |
|
118 epsilon = epsilon * incr; |
|
119 if (epsilon > 1e+15) |
|
120 error ("epsilon too large"); |
|
121 endif |
|
122 tb = tbold - (d2l - epsilon * eye (size (d2l))) \ dl; |
|
123 [g, g1, p, dev] = logistic_regression_likelihood (y, x, tb, z, z1); |
|
124 disp ("epsilon"); disp (epsilon); |
|
125 endwhile |
|
126 endif |
|
127 [dl, d2l] = logistic_regression_derivatives (x, z, z1, g, g1, p); |
|
128 if (print == 2) |
|
129 disp ("Iteration"); disp (iter); |
|
130 disp ("Deviance"); disp (dev); |
|
131 disp ("First derivative"); disp (dl'); |
|
132 disp ("Eigenvalues of second derivative"); disp (eig (d2l)'); |
|
133 endif |
|
134 endwhile |
|
135 |
|
136 ## tidy up output |
|
137 |
|
138 theta = tb (1 : nz, 1); |
|
139 beta = tb ((nz + 1) : (nz + nx), 1); |
|
140 |
|
141 if (print >= 1) |
|
142 printf ("\n"); |
|
143 printf ("Logistic Regression Results:\n"); |
|
144 printf ("\n"); |
|
145 printf ("Number of Iterations: %d\n", iter); |
|
146 printf ("Deviance: %f\n", dev); |
|
147 printf ("Parameter Estimates:\n"); |
|
148 printf (" Theta S.E.\n"); |
|
149 se = sqrt (diag (inv (-d2l))); |
|
150 for i = 1 : nz |
|
151 printf (" %8.4f %8.4f\n", tb (i), se (i)); |
|
152 endfor |
|
153 if (nx > 0) |
|
154 printf (" Beta S.E.\n"); |
|
155 for i = (nz + 1) : (nz + nx) |
|
156 printf (" %8.4f %8.4f\n", tb (i), se (i)); |
|
157 endfor |
|
158 endif |
|
159 endif |
|
160 |
|
161 if (nargout == 6) |
|
162 if (nx > 0) |
|
163 e = ((x * beta) * ones (1, nz)) + ((y * 0 + 1) * theta'); |
|
164 else |
|
165 e = (y * 0 + 1) * theta'; |
|
166 endif |
|
167 gamma = diff ([(y * 0) exp (e) ./ (1 + exp (e)) (y * 0 + 1)]')'; |
|
168 endif |
|
169 |
|
170 endfunction |