\documentclass[a4paper]{article} \usepackage{hyperref, graphicx, color, alltt, amsmath} \usepackage{Sweave} \usepackage[round]{natbib} \begin{document} \SweaveOpts{concordance=TRUE} %\VignetteIndexEntry{Weighted Support Vector Machine Formulation} %\VignetteDepends{WeightSVM} %\VignetteKeywords{classification, regression, machine learning, support vector machines} %\VignettePackage{WeightSVM} \SweaveOpts{engine=R,eps=FALSE} \setkeys{Gin}{width=0.8\textwidth} \title{Weighted Support Vector Machine Formulation \author{by Tianchen Xu} \url{tx2155@columbia.edu} } \maketitle \sloppy The original formulation of unweighted SVM with linear kernel is as follows \cite{valdimirnature}: \begin{eqnarray} \min_{\omega,\xi}&&\frac{1}{2}\|\omega\|^2+C \sum_{i=1}^n (\xi_i+\xi_i^*) \nonumber\\ \mbox{s.t.} &&y_i - \langle\omega,x_i\rangle-\omega_0\le \varepsilon + \xi_i,\nonumber\\ &&\langle\omega,x_i\rangle+\omega_0-y_i \le \varepsilon + \xi_i^*,\nonumber\\ &&\xi_i, \xi_i^* \ge 0.\nonumber \end{eqnarray} The constant $C>0$ determines the trade-off between the flatness of $f$ and the amount up to which deviations larger than $\varepsilon$ are tolerated. This corresponds to dealing with a so called $\varepsilon$-insensitive loss function $|\xi|_\varepsilon$ described by \begin{equation} |\xi|_\varepsilon = \begin{cases} 0, & \text{if }|\xi|\le \varepsilon\\ |\xi|-\varepsilon, & o/w. \end{cases}\nonumber \end{equation}\\ The corresponding weighted SVM with $W_i$ as individual weights: \begin{eqnarray} \min_{\omega,\xi}&&\frac{1}{2}\|\omega\|^2+C \sum_{i=1}^n \textcolor{red}{W_i}(\xi_i+\xi_i^*) \nonumber\\ \mbox{s.t.} &&y_i - \langle\omega,x_i\rangle-\omega_0\le \varepsilon + \xi_i,\nonumber\\ &&\langle\omega,x_i\rangle+\omega_0-y_i \le \varepsilon + \xi_i^*,\nonumber\\ &&\xi_i, \xi_i^* \ge 0.\nonumber \end{eqnarray}\\ Other kinds of weighted SVMs (with different kernels) have the similar formuation.\\ \noindent Available kernels:\\ \\ \noindent \begin{minipage}{\textwidth} \centering \begin{tabular}{|l|l|l|} \hline kernel & formula & parameters \\ \hline \hline linear & $\bf u^\top v$& (none) \\ polynomial & $(\gamma \mathbf{u^\top v}+c_0)^d$ & $\gamma, d, c_0$\\ radial basis fct. & $\exp\{-\gamma|\mathbf{u-v}|^2\}$&$\gamma$\\ sigmoid & $\tanh\{\gamma \mathbf{u^\top v}+c_0\}$ &$\gamma, c_0$\\ \hline \end{tabular} \end{minipage} \begin{thebibliography}{1} \bibitem[Valdimir and Vapnik(1995)]{valdimirnature} V~Valdimir and N~Vapnik. \newblock The nature of statistical learning theory. \newblock 1995. \end{thebibliography} \end{document}