\documentclass[fleqn]{article}

\usepackage{haldefs}
\usepackage{notes}
\usepackage{url}
\usepackage{graphicx}

\begin{document}
\lecture{CS5350: Machine Learning}{HW4: Loss and Optimization}{Due 30 Sep 2008}

\section{Written Exercises}

Answer the following questions in 25-100 words each:

\bee

\item Suppose we are trying to find the minimum of the function $f(x)
  = 3x^2 - 2x + 1$, for univarite (scalar) $x$.  First, find the
  minimum of this function using calculus.  Second, verify that this
  function is convex (and therefore the point you found in the first
  step is a \emph{global} minimum).  Finally, perform (by hand -- show
  your work) three steps of gradient descent with $\eta = 0.1$ and the
  initial point $x_0 = 1$.  How close does it get to the true
  solution?


\item 0/1 loss is hard to optimize using gradient descent because it
  is poorly behaved (discontinuous, non-differentiable, etc.).  A
  common ``smooth'' version of 0/1 loss is the \emph{sigmoid} loss,
  which makes use of our favorite function, the sigmoid: $\si(z) =
  1/(1+\exp[-z])$.  In particular, we define our loss function
  $\ell(y, f(x)) = \si(y f(x))$.  First, verify that this is a
  reasonable loss function: when $f(x)$ is correct, the loss is low,
  and when $f(x)$ is incorrect, the loss is high.  You may assume that
  $f(x)$ produces \emph{real values}, where positive values mean class
  $+1$ and negative values mean class $-1$.

\item Consider optimizing a linear function under sigmoid loss, so
  that we have $f(\mat X, \vec y,\vec w) = \sum_n \si(y_n (\vec w\T
  \vec x_n + b))$.  Compute the gradient of this function with respect
  to $\vec w$ and with respect to $b$ so that we might construct a
  gradient descent algorithm.

\item (6350 only) Show that the loss function from the previous two
  questions is convex in both $\vec w$ and $b$.
\ene


\end{document}
