changeset 1:8967b7da6ac2

Solve the exercise
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Thu, 03 Nov 2011 00:28:56 -0400
parents b87dfab8f3ac
children 443c6b29f715
files lrCostFunction.m oneVsAll.m predict.m predictOneVsAll.m
diffstat 4 files changed, 55 insertions(+), 174 deletions(-) [+]
line wrap: on
line diff
--- a/lrCostFunction.m
+++ b/lrCostFunction.m
@@ -1,50 +1,19 @@
 function [J, grad] = lrCostFunction(theta, X, y, lambda)
-%LRCOSTFUNCTION Compute cost and gradient for logistic regression with 
-%regularization
-%   J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using
-%   theta as the parameter for regularized logistic regression and the
-%   gradient of the cost w.r.t. to the parameters. 
+  ##LRCOSTFUNCTION Compute cost and gradient for logistic regression with 
+  ##regularization
+  ##   J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using
+  ##   theta as the parameter for regularized logistic regression and the
+  ##   gradient of the cost w.r.t. to the parameters. 
 
-% Initialize some useful values
-m = length(y); % number of training examples
-
-% You need to return the following variables correctly 
-J = 0;
-grad = zeros(size(theta));
+  ## Initialize some useful values
+  m = length(y); ## number of training examples
 
-% ====================== YOUR CODE HERE ======================
-% Instructions: Compute the cost of a particular choice of theta.
-%               You should set J to the cost.
-%               Compute the partial derivatives and set grad to the partial
-%               derivatives of the cost w.r.t. each parameter in theta
-%
-% Hint: The computation of the cost function and gradients can be
-%       efficiently vectorized. For example, consider the computation
-%
-%           sigmoid(X * theta)
-%
-%       Each row of the resulting matrix will contain the value of the
-%       prediction for that example. You can make use of this to vectorize
-%       the cost function and gradient computations. 
-%
-% Hint: When computing the gradient of the regularized cost function, 
-%       there're many possible vectorized solutions, but one solution
-%       looks like:
-%           grad = (unregularized gradient for logistic regression)
-%           temp = theta; 
-%           temp(1) = 0;   % because we don't add anything for j = 0  
-%           grad = grad + YOUR_CODE_HERE (using the temp variable)
-%
+  ## h_theta(x)
+  ht = sigmoid (X*theta); 
 
-
-
-
-
-
+  J = -sum (y.*log (ht) + (1 - y).*log (1 - ht))/m \
+      + lambda*sum (theta(2:end).^2)/(2*m);
 
-
-
+  grad = (X'*(ht - y) + [0; lambda*theta(2:end,:)])/m ;
 
-% =============================================================
-
-end
+endfunction
\ No newline at end of file
--- a/oneVsAll.m
+++ b/oneVsAll.m
@@ -1,66 +1,20 @@
 function [all_theta] = oneVsAll(X, y, num_labels, lambda)
-%ONEVSALL trains multiple logistic regression classifiers and returns all
-%the classifiers in a matrix all_theta, where the i-th row of all_theta 
-%corresponds to the classifier for label i
-%   [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels
-%   logisitc regression classifiers and returns each of these classifiers
-%   in a matrix all_theta, where the i-th row of all_theta corresponds 
-%   to the classifier for label i
+  ##  [all_theta] = ONEVSALL(X, y, num_labels, lambda)
+  ##
+  ##   trains num_labels logisitc regression classifiers and returns
+  ##   each of these classifiers in a matrix all_theta, where the i-th
+  ##   row of all_theta corresponds to the classifier for label i
 
-% Some useful variables
-m = size(X, 1);
-n = size(X, 2);
-
-% You need to return the following variables correctly 
-all_theta = zeros(num_labels, n + 1);
-
-% Add ones to the X data matrix
-X = [ones(m, 1) X];
+  m = rows (X);
+  n = columns (X);
 
-% ====================== YOUR CODE HERE ======================
-% Instructions: You should complete the following code to train num_labels
-%               logistic regression classifiers with regularization
-%               parameter lambda. 
-%
-% Hint: theta(:) will return a column vector.
-%
-% Hint: You can use y == c to obtain a vector of 1's and 0's that tell use 
-%       whether the ground truth is true/false for this class.
-%
-% Note: For this assignment, we recommend using fmincg to optimize the cost
-%       function. It is okay to use a for-loop (for c = 1:num_labels) to
-%       loop over the different classes.
-%
-%       fmincg works similarly to fminunc, but is more efficient when we
-%       are dealing with large number of parameters.
-%
-% Example Code for fmincg:
-%
-%     % Set Initial theta
-%     initial_theta = zeros(n + 1, 1);
-%     
-%     % Set options for fminunc
-%     options = optimset('GradObj', 'on', 'MaxIter', 50);
-% 
-%     % Run fmincg to obtain the optimal theta
-%     % This function will return theta and the cost 
-%     [theta] = ...
-%         fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), ...
-%                 initial_theta, options);
-%
+  all_theta = zeros(num_labels, n + 1);
+  X = [ones(m, 1) X];
+
+  for c = 1:num_labels
 
-
-
-
-
-
-
-
+    all_theta(c, :) = fmincg ( @(t) lrCostFunction (t, X, y == c, lambda),
+                              zeros (n+1, 1));
+  endfor
 
-
-
-
-% =========================================================================
-
-
-end
+endfunction
--- a/predict.m
+++ b/predict.m
@@ -1,35 +1,11 @@
 function p = predict(Theta1, Theta2, X)
-%PREDICT Predict the label of an input given a trained neural network
-%   p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
-%   trained weights of a neural network (Theta1, Theta2)
-
-% Useful values
-m = size(X, 1);
-num_labels = size(Theta2, 1);
-
-% You need to return the following variables correctly 
-p = zeros(size(X, 1), 1);
+  ##PREDICT Predict the label of an input given a trained neural network
+  ##   p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
+  ##   trained weights of a neural network (Theta1, Theta2)
 
-% ====================== YOUR CODE HERE ======================
-% Instructions: Complete the following code to make predictions using
-%               your learned neural network. You should set p to a 
-%               vector containing labels between 1 to num_labels.
-%
-% Hint: The max function might come in useful. In particular, the max
-%       function can also return the index of the max element, for more
-%       information see 'help max'. If your examples are in rows, then, you
-%       can use max(A, [], 2) to obtain the max for each row.
-%
-
+  m = rows (X);
+  X = [ones(m, 1), X];
 
-
-
-
-
-
+  [~, p] = max ( [ones(m, 1), sigmoid(X*Theta1')]*Theta2', [], 2);
 
-
-% =========================================================================
-
-
-end
+endfunction
--- a/predictOneVsAll.m
+++ b/predictOneVsAll.m
@@ -1,42 +1,24 @@
 function p = predictOneVsAll(all_theta, X)
-%PREDICT Predict the label for a trained one-vs-all classifier. The labels 
-%are in the range 1..K, where K = size(all_theta, 1). 
-%  p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions
-%  for each example in the matrix X. Note that X contains the examples in
-%  rows. all_theta is a matrix where the i-th row is a trained logistic
-%  regression theta vector for the i-th class. You should set p to a vector
-%  of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2
-%  for 4 examples) 
-
-m = size(X, 1);
-num_labels = size(all_theta, 1);
-
-% You need to return the following variables correctly 
-p = zeros(size(X, 1), 1);
-
-% Add ones to the X data matrix
-X = [ones(m, 1) X];
+  ##PREDICT Predict the label for a trained one-vs-all classifier. The labels 
+  ##are in the range 1..K, where K = size(all_theta, 1). 
+  ##  p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions
+  ##  for each example in the matrix X. Note that X contains the examples in
+  ##  rows. all_theta is a matrix where the i-th row is a trained logistic
+  ##  regression theta vector for the i-th class. You should set p to a vector
+  ##  of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2
+  ##  for 4 examples) 
 
-% ====================== YOUR CODE HERE ======================
-% Instructions: Complete the following code to make predictions using
-%               your learned logistic regression parameters (one-vs-all).
-%               You should set p to a vector of predictions (from 1 to
-%               num_labels).
-%
-% Hint: This code can be done all vectorized using the max function.
-%       In particular, the max function can also return the index of the 
-%       max element, for more information see 'help max'. If your examples 
-%       are in rows, then, you can use max(A, [], 2) to obtain the max 
-%       for each row.
-%       
+  ## Add ones to the X data matrix
+  X = [ones(rows(X), 1) X];
+ 
+  ## Index where the max is the predicted result (note we don't need the
+  ## sigmoid, which gives the actual probabilities, as it doesn't affect
+  ## the max)
+  [junk, p] = max (X*all_theta', [], 2);
 
-
-
-
-
+  ## Octave 3.4 and later can use the following syntax to omit output
+  ## arguments
+  ##
+  ## [~, p] = max (X*all_theta', [], 2);
 
-
-% =========================================================================
-
-
-end
+endfunction