\documentclass[fleqn]{article}

\usepackage{haldefs}
\usepackage{notes}
\usepackage{url}

\begin{document}
\lecture{Machine Learning}{HW05: Neural Networks}{CS5350, Fall 2009}

% IF YOU ARE USING THIS .TEX FILE AS A TEMPLATE, PLEASE REPLACE
% "CS5350, Fall 2009" WITH YOUR NAME AND UID.

Hand in at: \url{http://www.cs.utah.edu/~hal/handin.pl?course=cs5350}.
Remember that only PDF submissions are accepted.  We encourage using
\LaTeX\ to produce your writeups.  See \verb+hw01.tex+ for an example
of how to do so.  You can make a \verb+.pdf+ out of the \verb+.tex+ by
running ``\verb+pdflatex hw05.tex+''.

\section{PRML Exercises}

\bee
\i *5.6
\i  5.7
\i  5.18 (6350 only)
\ene

\section{Additional Exercises}

\bee
\i Consider the soft-max function over a vector $\vec a = \langle a_1,
a_2, \dots, a_K \rangle$ defined by:
\begin{equation}
\textrm{softmax}(\vec a, k) = \frac {\exp[ a_k ]} {\sum_j \exp[ a_j ]}
\end{equation}
Take, for example, $\vec a = \langle -1, 2, 1 \rangle$.  Compute the
soft-max value for each component of $\vec a$ (please use a calculator
and only give a couple of decimal values for each number).  Now, scale
$\vec a$ by some ``temperature'' $T$; that is, $(1/T) \vec a = \langle
-1/T, 2/T, 1/T \rangle$.  Compute the soft-max value for each
component of $T \vec a$ for $T \in \{ 10, 2, 1, 0.5, 0.1, 0.01 \}$ to
verify that as the the temperature decreases, the soft-max function
approaches just the max function.  Argue (in words, not math) why this
happens and why what's so special about using something like an
exponential in the softmax function.
\ene

\end{document}
