%Macros
\documentclass[11pt]{article}
\usepackage{amsmath,amssymb,amsthm}
\usepackage{graphicx}
\usepackage{url}
\usepackage{hyperref}
\usepackage{color}
\usepackage{algorithm,algorithmic}
\usepackage[T1]{fontenc}
\hypersetup{
colorlinks=true,
linkcolor=blue,
filecolor=magenta,
urlcolor=cyan,
}
\newcommand{\INPUT}{\item[{\bf Input:}]}
\newcommand{\OUTPUT}{\item[{\bf Output:}]}
\newcommand{\RR}{{\mathbb R}}
\newcommand{\myvec}[1]{\mathbf{#1}}
\newcommand{\ignore}[1]{}%
\DeclareMathOperator*{\E}{\mathbb{E}}
\let\Pr\relax
\DeclareMathOperator*{\Pr}{\mathbb{P}}
%\DeclareMathOperator*{\myv}{\mathbf{#1}}
\newcommand{\mv}[1]{\mathbf{#1}}
\newcommand{\mynorm}[1]{\|{#1}\|}
\newcommand{\eps}{\varepsilon}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\handout}[5]{
\noindent
\begin{center}
\framebox{
\vbox{
%\hbox to 5.78in { {\bf AM 221: Advanced Optimization } \hfill #2 }
\hbox to 6.38in { {\bf AM 221: Advanced Optimization } \hfill #2 }
\vspace{4mm}
%\hbox to 5.78in { {\Large \hfill #5 \hfill} }
\hbox to 6.38in { {\Large \hfill #5 \hfill} }
\vspace{2mm}
\hbox to 6.38in { {\em #3 \hfill #4} }
%\hbox to 5.78in { {\em #3 \hfill #4} }
}
}
\end{center}
\vspace*{4mm}
}
\newcommand{\lecture}[3]{\handout{#1}{#2}{#3}{Lecture #1}}
\newcommand{\homework}[3]{\handout{#1}{#2}{#3}{Problem Set #1}}
\newcommand{\sect}[3]{\handout{#1}{#2}{#3}{Section #1}}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{observation}[theorem]{Observation}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{assumption}[theorem]{Assumption}
\newtheorem*{theorem*}{Theorem}
\newtheorem*{corollary*}{Corollary}
\newtheorem*{conjecture*}{Conjecture}
\newtheorem*{lemma*}{Lemma}
\newtheorem*{thm*}{Theorem}
\newtheorem*{prop*}{Proposition}
\newtheorem*{obs*}{Observation}
\newtheorem*{definition*}{Definition}
\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}
\newtheorem*{example}{Example}
\newtheorem*{rem*}{Remark}
\newtheorem*{rec*}{Recommendation}
% 1-inch margins, from fullpage.sty by H.Partl, Version 2, Dec. 15, 1988.
\topmargin 0pt
\advance \topmargin by -\headheight
\advance \topmargin by -\headsep
\textheight 8.9in
\oddsidemargin 0pt
\evensidemargin \oddsidemargin
\marginparwidth 0.5in
\textwidth 6.5in
\parindent 0in
\parskip 1.5ex
%Basics
\newcommand{\new}[1]{{\em #1\/}} % New term (set in italics).
\newcommand{\boxdef}[1]
{
\fbox{
\begin{minipage}{42em}
\begin{definition*}
{#1}
\end{definition*}
\end{minipage}
}
}
\newcommand{\boxthm}[1]
{
\fbox{
\begin{minipage}{42em}
\begin{theorem*}
{#1}
\end{theorem*}
\end{minipage}
}
}
%Probability
\newcommand{\prob}[2][]{\text{\bf P}\ifthenelse{\not\equal{}{#1}}{_{#1}}{}\!\left(#2\right)}
\newcommand{\expect}[2][]{\text{\bf E}\ifthenelse{\not\equal{}{#1}}{_{#1}}{}\!\left[#2\right]}
\newcommand{\var}[2][]{\text{\bf Var}\ifthenelse{\not\equal{}{#1}}{_{#1}}{}\!\left[#2\right]}
%Sets
\newcommand{\set}[1]{\{#1\}} % Set (as in \set{1,2,3})
\newcommand{\given}{\, : \,}
\newcommand{\setof}[2]{\{{#1} \given {#2}\}} % Set (as in \setof{x}{x > 0})
\newcommand{\compl}[1]{\overline{#1}} % Complement of ...
\newcommand{\zeros}{{\mathbf 0}}
\newcommand{\ones}{{\mathbf 1}}
\newcommand{\union}{{\bigcup}}
\newcommand{\inters}{{\bigcap}}
%Other Math
\newcommand{\floor}[1]{{\lfloor {#1} \rfloor}}
\newcommand{\bigfloor}[1]{{\left\lfloor {#1} \right\rfloor}}
\DeclareMathOperator{\argmax}{argmax}
\DeclareMathOperator{\argmin}{argmin}
\newcommand{\PRIMAL}{{\textsc{Primal}}}
\newcommand{\DUAL}{{\textsc{Dual}}}
%Numbers
\newcommand{\C}{\mathbb{C}} % Complex numbers.
\newcommand{\N}{\mathbb{N}} % Positive integers.
\newcommand{\Q}{\mathbb{Q}} % Rationals.
\newcommand{\R}{\mathbb{R}} % Reals.
\newcommand{\Z}{\mathbb{Z}} % Integers.
\newcommand{\M}{\mathcal{M}} % Matroids.
\newcommand{\I}{\mathcal{I}} % Independent Sets.
%Headings
\newcommand{\parta}{\textbf{(a)}}
\newcommand{\partb}{\textbf{(b)}}
\newcommand{\partc}{\textbf{(c)}}
\newcommand{\partd}{\textbf{(d)}}
\newcommand{\parte}{\textbf{(e)}}
\newcommand{\bt}{\boldsymbol{\theta}}
\newcommand{\cl}[1]{\text{\textbf{#1}}}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\bx}{\mathbf{x}}
\newcommand{\bv}{\mathbf{v}}
\newcommand{\bq}{\mathbf{q}}
\newcommand{\by}{\mathbf{y}}
\newcommand{\bb}{\mathbf{b}}
\newcommand{\bu}{\mathbf{u}}
\newcommand{\bd}{\mathbf{d}}
\newcommand{\ba}{\mathbf{a}}
\newcommand{\bc}{\mathbf{c}}
\newcommand{\bl}{\mathbf{l}}
\newcommand{\bp}{\mathbf{p}}
\newcommand{\bw}{\mathbf{w}}
\newcommand{\bz}{\mathbf{z}}
\begin{document}
\homework{9 --- {\color{red} Due Wed, Apr. 6th at 23:59}}{Spring 2016}{Prof.\ Yaron Singer}
%\renewcommand{\abstractname}{}
\paragraph{Instructions:}
All your solutions should be prepared in \LaTeX \ and the
PDF and .tex should be submitted to canvas. For each question, the best and
correct answers will be selected as sample solutions for the entire class to
enjoy. If you prefer that we do not use your solutions, please indicate this
clearly on the first page of your assignment.
The programming parts can be written in the programming language of your choice
and the code should be submitted alongside your solutions.
\paragraph{1. Vertex Cover is NP-complete}
In this problem we will show that the Vertex Cover problem is NP-complete by
reduction from the Clique problem.
%\begin{itemize}
\boxdef{
Consider an undirected graph $G = (V, E)$. A set $C\subseteq V$ is
called a \emph{vertex cover} of $G$ iff for all $\{u,v\}\in E$, $u\in C$ or
$v\in C$ (or both).
%Given a graph, a \emph{vertex cover} is a set of vertices in the graph $U$ where each edge in the graph has at least one endpoint in $U$.
}
In the decision version of the Vertex Cover problem, the input is an undirected
graph $G = (V, E)$ and an integer $k$ and the goal is to decide whether $G$ has
a vertex cover of size $k$.
\boxdef{Given an undirected graph $G = (V, E)$, a set $C\subseteq V$ is called a \emph{clique}
iff:
\begin{displaymath}
\forall \{u, v\}\in C \times C,\; (u, v)\in E
\end{displaymath}
}
In the Clique problem, the input is a graph $G = (V, E)$ and an integer $k$ and
the goal is to decide whether $G$ has a clique of size $k$. The Clique problem
is known to be NP-complete.
\begin{itemize}
\item[a.] Explain why Vertex Cover is in NP.
\item[b.] Given a graph $G = (V, E)$, we define its \emph{complement}
$\bar{G} \eqdef (V, 2^V\setminus E)$. In other words, $\bar{G}$ has the
same set of vertices, and $\{u,v\}$ is an edge in $\bar{G}$ if and only
if it is not an edge in $G$.
Let $C\subseteq V$ be a set of vertices. Show that $C$ is a vertex
cover of $G$ if and only if $V\setminus C$ is a clique of $\bar{G}$.
\item[c.] Show that Clique is polynomial-time reducible to Vertex Cover and
conclude that Vertex Cover is NP-complete.
\end{itemize}
\paragraph{2. A 1/2-approximation Algorithm for Vertex Cover.}
In Section 3, we saw the Vertex Cover problem and formulated it as an integer
program. We then studied its LP relaxation and the dual of this relaxation and
saw that it could be interpreted as the relaxation of the Maximum Matching
problem. In this problem, we will draw inspiration from the Vertex
Cover/Matching duality to construct a 2-approximation algorithm for Vertex
Cover.
The optimization version of Vertex Cover problem is to find a vertex cover $S$ of
minimum size. Let us denote by $\texttt{OPT}_G$ the optimal value of the Vertex
Cover problem for $G$.
\boxdef{A set $M\subset E$ is called a \emph{matching} of $G$ if and only if:
\begin{displaymath}
\forall u\in V,\; |\{e\in M\,|\, u\in e\}| \leq 1
\end{displaymath}
In other words, a vertex is incident to at most one edge of $M$. A matching $M$
is said to be \emph{maximal} if there is no matching $M'$ with $M\subset M'$.
}
\begin{itemize}
\item[a.] Give an algorithm to construct a maximal matching of $G$. What is
the running time of this algorithm?
\item[b.] Let $M$ be a matching of $G$, show that $|M|\leq \texttt{OPT}_G$.
\item[c.] Let $M$ be a matching and let us denote by $V(M)$ the set of
endpoints of edges in $M$:
\begin{displaymath}
V(M)\eqdef \{u\,|\,\exists e\in M,\; u\in e\}
\end{displaymath}
Show that when $M$ is a maximal matching of $G$, $V(M)$ is a vertex
cover of $G$.
\item[d.] Let $M$ be a maximal matching of $G$, show that $|V(M)|\leq
2\texttt{OPT}_G$.
\end{itemize}
\paragraph{3. DNA sequence alignment: algorithms.}
In this problem we study the problem of DNA sequence alignment. The input
to this problem is a pair of DNA sequences:
1: \texttt{TTGATCAATGG}\\
2: \texttt{ATCATACAAGGA}\\
and the goal is to understand whether or not they have the same function. For
this we find the best way to align the sequences to each other. If after
alignment, the sequences look ``similar enough'', we will conclude that they
have the same function.
More precisely, the sequences are aligned by introducing gaps. For the above
two sequences, a possible way to introduce gaps is as follows (gaps are
represented by hyphens):
1: \texttt{TTGAT-CAATGG-}\\
2: \texttt{ATCATACAA-GGA}\\
After introducing the gaps, the sequences are compared by counting the number
of mismatches, \emph{i.e.} the number of locations where the nucleotides differ.
For the above example, there are two mismatches: one at location 0, and one at
location 2 (using the usual array indexing convention in programming).
Let $g$ denote the number of gaps and $m$ denote the number of mismatches in
a given alignment, the overall cost $c$ of the alignment is then defined by:
\begin{displaymath}
c\eqdef 2\cdot g + m
\end{displaymath}
The above alignment has cost $2\cdot 3 + 2 = 8$. Another possible alignment is:
1: \texttt{TTGAT-CAATGG}\\
2: \texttt{ATCATACAAGGA}\\
which has cost $2\cdot 1 + 4 = 6$, which is minimal among all possible
alignments of these two sequences.
Let us denote by $s$ (resp. $t$) the first (resp. second) sequence. We define
$c(s, t)$ to be the cost of the alignment of minimal cost among all possible
alignments. Finally, we denote by $n$ (resp. $m$) the length of $s$ (resp.
$t$).
\begin{itemize}
\item[a.] For a sequence $s$, $s[i:j]$ will denote the substring of $s$
ranging from index $i$ inclusive to $j$ exclusive. Give a formula to
compute $c(s[0:i], t[0:j])$ as a function of $c(s[0:i], t[0:j-1])$,
$c(s[0:i-1], t[0:j])$ and $c(s[0:i-1], t[0:j-1])$.
\item[b.] Using part a. design and describe an algorithm to compute $c(s,
t)$ [\textbf{hint:} think dynamically!]. What is the running time
complexity of this algorithm?
\end{itemize}
\paragraph{4. DNA sequence alignment: implementation.}
You will now implement the algorithm you designed in Problem 3. The dataset is
available at \url{http://thibaut.horel.org/dna.txt}. Each line of the datafile
is the list of the first 10,000 base pairs of the genome of the well-known
\emph{Escherichia coli} bacteria. There are only two lines corresponding to two
different species of the bacteria.
\begin{itemize}
\item[a.] Write a script which reads the datafile and outputs the cost of
the optimal alignment of the two DNA sequences. Submit the code you
wrote.
\item[b.] Two DNA sequences are considered to have the same function is the
cost of the optimal alignment divided by the length of the sequence is
smaller than 5\%. Do the two DNA sequences in the datafile play the
same function?
\end{itemize}
\end{document}