%\VignetteIndexEntry{distr - manual} %\VignetteDepends{distr} %\VignetteKeyword{probability distribution} %\VignetteKeyword{simulation} %\VignetteKeyword{estimation} %\VignettePackage{distr} %\VignetteEngine{knitr::knitr} % \documentclass[11pt]{article} % \usepackage{svn-multi} % Version control information: \svnidlong {$HeadURL: svn+ssh://ruckdeschel@svn.r-forge.r-project.org/svnroot/distr/pkg/distrDoc/vignettes/distr.Rnw $} {$LastChangedDate: 2024-09-28 17:07:56 +0200 (Sa, 28 Sep 2024) $} {$LastChangedRevision: 1475 $} {$LastChangedBy: ruckdeschel $} %\svnid{$Id: distr.Rnw 1475 2024-09-28 15:07:56Z ruckdeschel $} % Don't forget to set the svn property 'svn:keywords' to % 'HeadURL LastChangedDate LastChangedRevision LastChangedBy' or % 'Id' or both depending if you use \svnidlong and/or \svnid % \newcommand{\svnfooter}{Last Changed Rev: \svnkw{LastChangedRevision}} \svnRegisterAuthor{ruckdeschel}{Peter Ruckdeschel} \svnRegisterAuthor{stamats}{Matthias Kohl} \svnRegisterAuthor{florian}{Florian Camphausen} \svnRegisterAuthor{stabla}{Thomas Stabla} \svnRegisterAuthor{anhuel}{Anja H{\"u}ller} \svnRegisterAuthor{ifrin}{Eleonara Feist} \svnRegisterAuthor{jdospina}{Juan David Ospina} \svnRegisterAuthor{kowzar}{Kouros Owzar} % %borrowed from doc/manual/refman.top % %% Set PDF 1.5 and compression, including object compression %% Needed for MiKTeX -- most other distributions default to this \ifx\pdfoutput\undefined \else \ifx\pdfoutput\relax \else \ifnum\pdfoutput>0 % PDF output \pdfminorversion=5 \pdfcompresslevel=9 \pdfobjcompresslevel=2 \fi \fi \fi % % \usepackage{geometry} \usepackage[T1]{fontenc} \usepackage{ifpdf} \usepackage{color} \definecolor{darkblue}{rgb}{0.0,0.0,0.75} \definecolor{Rcolor}{rgb}{0,0.5,0.5} \definecolor{Rout}{rgb}{0.461,0.039,0.102} \definecolor{Rcomment}{rgb}{0.101,0.043,0.432} \usepackage{amssymb} \usepackage{url} \usepackage[% baseurl={https://www.bioconductor.org},% pdftitle={S4 Classes for Distributions---a manual for packages distr, distrSim, distrTEst, distrEx, distrMod, and distrTeach},% pdfauthor={Peter Ruckdeschel, Matthias Kohl, Thomas Stabla, Florian Camphausen},% pdfsubject={distr},% pdfkeywords={probability distribution,simulation,estimation},% pagebackref,bookmarks,colorlinks,linkcolor=darkblue,citecolor=darkblue,% pagecolor=darkblue,raiselinks,plainpages,pdftex]{hyperref} % % ------------------------------------------------------------------------------- \RequirePackage{fancyvrb} \RequirePackage{listings} %\usepackage{Sweave} no longer needed % ------------------------------------------------------------------------------- \definecolor{distrCol}{rgb}{0.0,0.4,0.4} %% %% ------------------------------------------------------------------------------ %% before version 2.7, we used package SweaveListingUtils for syntax highlighting %% from version 2.7 on, we switch to knitR %% ------------------------------------------------------------------------------ %% %%#(ll)SweaveListingsPreparations, results="asis", echo=FALSE(ggeq) %%#require(SweaveListingUtils) %%#SweaveListingPreparations() %%#setToBeDefinedPkgs(pkgs = c("distr","distrEx","distrTEst","distrSim", %%# "distrDoc","distrTeach","distrMod","RandVar"), %%# keywordstyles = "\\bf\\color{distrCol}") %%#(at) %% %% ------------------------------------------------------------------------------ % % ------------------------------------------------------------------------------- \newcommand{\pkgExversion}{{\tt 2.7}} \newcommand{\Reals}{\mathbb{R}} \newcommand{\R}{\mathbb{R}} \newcommand{\N}{\mathbb{N}} % ------------------------------------------------------------------------------- % \markboth{\sl Packages ``{\tt distr}'', ``{\tt distrSim}'', ``{\tt distrTEst}'', ``{\tt distrEx}'', ``{\tt distrEllipse}'', ``{\tt distrTeach}'', ``{\tt distrMod}''}% {\sl Packages ``{\tt distr}'', ``{\tt distrSim}'', ``{\tt distrTEst}'', ``{\tt distrEx}'', ``{\tt distrEllipse}'', ``{\tt distrTeach}'', ``{\tt distrMod}''} % % ------------------------------------------------------------------------------- % % ------------------------------------------------------------------------------- \begin{document} % ------------------------------------------------------------------------------- <>= library(knitr) opts_chunk$set(tidy=FALSE) @ % ------------------------------------------------------------------------------- \lstdefinelanguage{Rd}[common]{TeX}% {moretexcs={acronym,alias,arguments,author,bold,cite,% code,command,concept,cr,deqn,describe,% description,details,dfn,doctype,dots,% dontrun,dontshow,donttest,dQuote,% email,emph,enc,encoding,enumerate,env,eqn,% examples,file,format,if,ifelse,item,itemize,kbd,keyword,% ldots,link,linkS4class,method,name,note,% option,out,pkg,preformatted,R,Rdopts,Rdversion,% references,S3method,S4method,Sexpr,samp,section,% seealso,source,sp,special,sQuote,strong,% subsection,synopsis,tab,tabular,testonly,% title,url,usage,value,var,verb}, sensitive=true,% morecomment=[l]\%% 2008/9 Peter Ruckdeschel }[keywords,comments]%% \lstdefinestyle{Rdstyle}{fancyvrb=TRUE,language=Rd, keywordstyle={\bf},% basicstyle={\color{black}\footnotesize},% commentstyle={\ttfamily\itshape},% alsolanguage=R}% \global\def\Rdlstset{\lstset{style=Rdstyle}}% % ------------------------------------------------------------------------------- \lstset{language=R,basicstyle={\color{Rcolor}\footnotesize},keywordstyle={\bf}} \let\code\lstinline \def\Code#1{{\tt\color{Rcolor} #1}} \def\file#1{{\tt #1}} \def\pkg#1{{\tt"#1"}} \newcommand{\pkgversion}{{\tt 2.9}} % ------------------------------------------------------------------------------- \title{{\tt S4} Classes for Distributions---a manual for packages \pkg{distr}, \pkg{distrEx}, \pkg{distrEllipse}, \pkg{distrMod}, \pkg{distrSim}, \pkg{distrTEst}, \pkg{distrTeach}, version \pkgversion} %,version \pkgExversion} \author{\small Peter Ruckdeschel\thanks{Carl von Ossietzky Universit\"at Oldenburg} \\[-.5ex] \small Matthias Kohl\thanks{Hochschule Furtwangen} \\[-.5ex] \small Thomas Stabla\thanks{Graf-M\"unster-Gymnasium, Bayreuth} \\[-.5ex] \small Florian Camphausen\thanks{European Central Bank, Frankfurt} \smallskip\\ \small Institute for Mathematics\\[-.5ex] \small School of Mathematics and Science\\[-.5ex] \small Oldenburg University\\[-.5ex] \small PO box 25 03\\[-.5ex] \small 26111 Oldenburg (Oldb.)\\[-.5ex] \small Germany\\ \small e-Mail: {\small \tt peter.ruckdeschel@uni-oldenburg.de}\medskip\\ \parbox[t]{5cm}{ \footnotesize\sffamily Version control information: \begin{tabbing} \footnotesize\sffamily Last changes revision: \= \kill Head URL: \> \parbox[t]{6cm}{\url{\svnkw{HeadURL}}}\\[1.2ex] Last changed date: \> \svndate\\ Last changes revision: \> \svnrev\\ Version: \> \svnFullRevision*{\svnrev}\\ Last changed by: \> \svnFullAuthor*{\svnauthor}\\ \end{tabbing} } } \maketitle % ------------------------------------------------------------------------------- \begin{abstract} % ------------------------------------------------------------------------------- \pkg{distr} is a package for {\sf R} from version {\tt 1.8.1} onwards that is distributed under {\tt LGPL-3}. Its own current version is \pkgversion. % The aim of this package is to provide a conceptual treatment of random variables (r.v.'s) by means of {\tt S4}--classes. A mother class \code{Distribution} is introduced with slots for a parameter and for functions {\tt r}, {\tt d}, {\tt p}, and {\tt q} for simulation, respectively for evaluation of density / c.d.f.\ and quantile function of the corresponding distribution. All distributions of the \pkg{stats} package are implemented as subclasses of either \code{AbscontDistribution} or \code{DiscreteDistribution}, which themselves are again subclasses of \code{UnivariateDistribution}. %\\ % By means of these classes, we may automatically generate new objects of these classes for the laws of r.v.'s under standard mathematical univariate transformations and under standard bivariate arithmetical operations acting on independent r.v.'s. % %From version 1.6 on, \pkg{distr} has been split up into the smaller packages %\pkg{distr} (only distribution-classes and -methods), \pkg{distrSim} %(standardized treatment of simulations, also under contaminations) %and \pkg{distrTEst} \newline(classes and methods for evaluations of statistical %procedures on such simulations). Package \pkg{distr} in this setting works as basic package for further extensions. These start with package \pkg{distrEx}, covering statistical functionals like expectation, variance and the median evaluated at distributions, as well as distances between distributions and basic support for multivariate and conditional distributions. Next, from version 2.0 on, comes package \pkg{distrMod} which uses these concepts to provide an object orientated competitor to \code{fitdistr} from package \pkg{MASS} in covering estimation in statistical models. Further on there are packages \pkg{distrSim} for the standardized treatment of simulations, also under contaminations and package \pkg{distrTEst} with classes and methods for evaluations of statistical procedures on such simulations. Finally, from version 2.0 on, there is package \pkg{distrTeach} to embody illustrations for basic stats courses using our distribution classes. From version 2.4 on, we have moved support for extreme value distributions, as well as for certain scale-shape distributions to the new package \pkg{RobExtremes}. This concerns the Gumbel, Weibull, Pareto distributions. %\noindent The latter two of them require package \pkg{setRNG} by %\href{mailto:pgilbert@bank-banque-canada.ca}{Paul Gilbert} %to be installed from \href{https://cran.r-project.org/mirrors.html}{\tt CRAN}. % \\ %\noindent Additionally, mainly contributed by \cite{MK:05}, in \pkg{distrEx} we %extend the functionality of \pkg{distr}, providing functionals like expectation %or variance and distances for distributions. Also, this package contains some %first steps to multivariate distributions, providing classes for discrete %multivariate distributions and for factorized, conditional %distributions. % ------------------------------------------------------------------------------- \end{abstract} % ------------------------------------------------------------------------------- \tableofcontents \noindent {\small Parts of this document appeared in an earlier and much shorter form in {\em R-News\/}, {\bf 6}(2) as {\sf ``S4 Classes for Distributions''}, c.f.\ \cite{R:K:S:C:04}, which in its published form refers to package versions 1.6, resp.\ 0.4-2. This present document takes into account the subsequent revisions and versions.}\medskip % ------------------------------------------------------------------------------- \addtocounter{section}{-1} \section{Motivation} % ------------------------------------------------------------------------------- {\sf R} up to now contains powerful techniques for virtually any useful distribution using the suggestive naming convention {\tt [prefix]} as functions where {\tt [prefix]} stands for {\tt r}, {\tt d}, {\tt p}, or {\tt q} and {\tt } is the name of the distribution.\\ There are limitations of this concept, however: You can only use distributions which are implemented in some library already or for which you yourself have provided an implementation. In many natural settings you want to formulate algorithms once for all distributions, so you should be able to treat the actual distribution {\tt } as sort of a variable.\\ You may of course paste together prefix and the value of {\tt } as a string and then use \code{eval(parse(....))}. This is neither very elegant nor flexible, however.\\ % Instead, we would rather like to implement the algorithm by passing an object of some distribution class as argument to the function. Even better though, we would use a generic function and let the {\tt S4}-dispatching mechanism decide what to do at run-time. In particular, we would like to automatically generate the corresponding functions {\tt r}, {\tt d}, {\tt p}, and {\tt q} for the law of expressions like \code{X+3Y} for objects \code{X} and \code{Y} of class \code{Distribution}, or, more general, of a transformation of $X$, $Y$ under a function $f\colon \Reals^2 \to \Reals$ which is already realized as a function in {\sf R}.\\ This is possible with package \pkg{distr}. As an example, try <>= ## preparation: set option withSweave to TRUE require(distrTEst) require(distrEx) require(distrTeach) require(distrMod) distroptions(withSweave = TRUE) options("newDevice" = TRUE) @ <>= require(distr) N <- Norm(mean = 2, sd = 1.3) P <- Pois(lambda = 1.2) Z <- 2*N + 3 + P Z plot(Z) p(Z)(0.4) q(Z)(0.3) ## in RStudio or Jupyter IRKernel, use q.l(.)(.) instead of q(.)(.) Zs <- r(Z)(50) Zs @ \par \begin{small} In the environments of \texttt{RStudio}, see \url{https://posit.co} and \texttt{Jupyter IRKernel}, see \url{https://github.com/IRkernel/IRkernel}, calls to \code{q} are caught away from standard {\sf R} evaluation and are treated in a non-standard way. This non-standard evaluation in particular throws errors at calls to our accessor methods \code{q} to slot \code{q} of the respective distribution object. For \texttt{RStudio}, this holds for calls from the console (i.e., in the \code{.GlobalEnv} environment) only, while calls to our accessor \code{q} in different environment are not affected. This is not true in \texttt{IRKernel}, where all calls to our accessor \code{q} are affected. To amend this, from version 2.6 on, we provide function \code{q.l} (for left-continuous quantile function) as alias to our accessors \code{q}, so that all our package functionality also becomes available in \texttt{RStudio} and \texttt{IRKernel}. The same holds, albeit with less consequences in \texttt{RStudio} and \texttt{IRKernel} for accessor \code{p}: Here we have an alias \code{p.r} (for the right-continuous cdf) for it.\\ \end{small} \par \begin{small} \noindent{\bf Comment:}\\ Let \code{N} an object of class \code{"Norm"} with parameters \code{mean=2}, \code{sd=1.3} and let \code{P} an object of class \code{"Pois"} with parameter \code{lambda=1.2}. Assigning to \code{Z} the expression \code{2*N+3+P}, a new distribution object is generated ---of class \code{"AbscontDistribution"} in our case--- so that identifying \code{N}, \code{P}, \code{Z} with random variables distributed according to {\tt N}, {\tt P}, {\tt Z}, ${\cal L}({\tt Z})={\cal L}(2*{\tt N}+3+{\tt P})$, and writing \code{p(Z)(0.4)} we get $P(Z\leq 0.4)$, \code{ q(Z)(0.3)} the $30\%$-quantile of {\tt Z}, and with \code{r(Z)(50)} we generate $50$ pseudo random numbers distributed according to {\tt Z}, while the \code{plot} command generates the above figure. \end{small} % ------------------------------------------------------------------------------- \section{Concept} % ------------------------------------------------------------------------------- In developing our packages, we had the following principles in mind: We wanted to be open in our design so that our classes could easily be extended by any volunteer in the {\sf R} community to provide more complex classes of distributions as multivariate distributions, times series distributions, conditional distributions. As an exercise, the reader is encouraged to implement extrem value distributions from the package \pkg{evd}\footnote{a solution to this ``homework'' may be found in the sources to \pkg{distrEx}, resp.\ from version 2.4 on, in the sources to \pkg{RobExtremes}}. The largest effort will in fact be the documentation\ldots\\ We also wanted to preserve naming and notation from {\sf R}-\pkg{stats} as far as possible so that any programmer used to {\tt S} could quickly use our package. Even more so, as the distributions already implemented to {\sf R} are all well tested and programmed with skills we lack, we use the existing {\tt r}, {\tt d}, {\tt p}, and {\tt q}-functions wherever possible, only wrapping them by small code sniplets to our class hierarchy.\\ Third we wanted to use a suggestive notation for our automatically generated methods \code{r}, \code{d}, \code{p}, and \code{q}, which we think is now largely achieved. All this should make intensive use of object orientation in order to be able to use inheritance and method overloading. Let us briefly explain why we decided to realize \code{r}, \code{d}, \code{p}, and \code{q} as part of our class definitions: Doing so, we place ourselves somewhere between pure object orientation where methods would be {\it slots\/} ---in the language of the {\tt S4}-concept, confer \cite{Cham:98}--- and the {\tt S4} paradigm where methods ``live their own life'' apart from the classes, or, to \code{q}, which should be regarded use \cite{Beng:03}'s terminology, we use COOP\footnote{class-object-orientated programming, as e.g.\ in {\tt C++}}-style for \code{r}, \code{d}, \code{p}, and \code{q} methods, and FOOP\footnote{function-object-orientated programming, as in the {\tt S4}-concept} -style for "normal" methods.\\ The {\tt S4}-paradigm with methods which are not attached to an object but rather behave differently according to the classes of their arguments is fine if there are particular user-written methods for only some few general distribution classes like \code{AbscontDistribution}, as in the case for \code{plot} or \code{"+"} (c.f.\ \cite{K:R:S:04}, Section 2.2). During a typical {\sf R} session with \pkg{distr}, however, there will be a lot of, mostly automatically generated objects of our distribution classes, each with its own \code{r}, \code{d}, \code{p}, and \code{q}; this even applies to intermediate expressions like \code{2*N}, \code{2*N+3} to eventually produce \code{Z} in the example in the motivation. Treating \code{r}, \code{d}, \code{p}, and \code{q} as generic functions, we would need to generate new classes for each expression \code{2*N}, \code{2*N+3}, \code{Z} and, correspondingly, particular {\tt S4}-methods for \code{r}, \code{d}, \code{p}, and \code{q} for each of these new classes; apparently, this would produce overly many classes for an effective inheritance structure. \\ In providing arithmetics for distributions, we have to deviate a little from the paradigm of {\tt S} as a functional language: For operators like ``$+$'', additional parameters controlling the precision of the results cannot be handily passed as arguments. For this purpose we provide global options which may be inspected and modified by \code{distroptions}, \code{getdistrOption}\footnote{Upto version 0.4-4, we used a different mechanism to inspect/modify global options of \pkg{distrEx} (see section~\ref{distrExoptions}); corresponding functions \code{distrExoptions}, \code{getdistrExOption} for package \pkg{distrEx} are available from version 1.9 on.} in complete analogy to \code{options}, \code{getOption}. % Finally our concept as to parameters: Contrary to the standard {\sf R}-functions like \code{rnorm} we only permit length $1$ for parameters like \code{mean}, because we see the objects as implementations of univariate random variables, for which vector-valued parameters make no sense; rather one could gather several objects with possibly different parameters to a vector/list of distributions. Of course, the original functions \code{rnorm} etc.\ remain unchanged and still allow for vector-valued parameters. Kouros Owzar in an off-list mail raised the point, that in case of multiple parameters as in case of the normal or the $\Gamma$-distribution, it might be useful to be able to pass these multiple parameters in vectorized form to the generating function. We, too, think that this is a good idea, but have shifted this question to the new extension package \pkg{distrMod} which covers more general treatment of statistical models, see section~\ref{distrMod}. % ------------------------------------------------------------------------------- \section{Organization in classes} % ------------------------------------------------------------------------------- Loosely speaking we have three large groups of classes: distribution classes (in \pkg{distr}), simulation classes (in \pkg{distrSim}) and an evaluation class (in \pkg{distrTEst}), where the latter two are to be considered only as tools which allow a unified treatment of simulations and evaluation of statistical estimation (perhaps also tests and predictions later) under varying simulation situations. Additionally, package \pkg{distrEx} provides classes for discrete multivariate distributions and for factorized, conditional distributions, as well as a bundle of functionals and distances (see below). % ------------------------------------------------------------------------------- \subsection{Distribution classes} % ------------------------------------------------------------------------------- The purpose of the classes derived from the class \code{Distribution} is to implement the concept of a r.v./distribution as such in {\sf R}.\\ All classes derived from \code{Distribution} have a slot \code{param} for a parameter, a slot \code{img} for the range and the constitutive slots \code{r}, \code{d}, \code{p}, and \code{q}.\\ From version 1.9 on, up to arguments referring to a parameter of the distribution (like \code{mean} for the normal distribution), these function slots have the same arguments as those of package \pkg{stats}, i.e.; for a distribution object \code{X} we may call these functions as \begin{itemize} \item \code{r(X)(n)} $\qquad$ ---except for objects of class \code{Hyper}, where there is a slot \code{n} already, so here the argument name to \code{r} is \code{nn}. \item \code{d(X)(x, log = FALSE)} \item \code{p(X)(q, lower.tail = TRUE, log.p = FALSE)} \item \code{q(X)(p, lower.tail = TRUE, log.p = FALSE)} \end{itemize} For the arguments of these function slots see e.g.\ \code{rnorm} from package \pkg{stats}. Note that, as usual, slots \code{d}, \code{p}, and \code{q} are vectorized in their first argument, but are not on the subsequent ones. The idea is to gain higher precision for the upper tails or when multiplying probabilities. % ------------------------------------------------------------------------------- \subsubsection{Subclasses} % ------------------------------------------------------------------------------- To begin with, we consider univariate distributions giving {\tt S4}-class \code{UnivariateDistribution}, and as typical subclasses, we introduce classes for absolutely continuous and discrete distributions ---\code{AbscontDistribution} and \code{DiscreteDistribution}.\\ The former, from version 1.9 on, has a slot \code{gaps} of class \code{OptionalMatrix}, i.e.; an object which may either be \code{NULL} or a \code{matrix}. This slot, if non-\code{NULL}, contains left and right endpoints of intervals where the density of the object is $0$. This slot may be inspected by the accessor \code{gaps()} and modified by a corresponding replacement method. It may also be filled automatically by \code{setgaps(object, exactq = 6, ngrid = 50000)}, where upon evaluation of the \code{d}-slot on a grid of length \code{ngrid}, all regions in the range\footnote{more precisely: between lower and upper \code{TruncQuantile}; \code{TruncQuantile} is a global option of \pkg{distr} described in section~\ref{options}} of the distribution where the density is smaller than $10^{\scriptscriptstyle - {\rm exactq}}$ are set to gaps. Internally, we have helper functions \code{.consolidategaps} to merge adjacent intervals and \code{mergegaps} to merge \code{slots} of different objects. \\ For saved objects from earlier versions, we provide the functions \code{isOldVersion} and \linebreak[4]\code{conv2NewVersion} to check whether the object was generated by an older version of this package and to convert such an object to the new format, respectively.\\ Class \code{DiscreteDistribution} has a slot \code{support}, a vector containing the support of the distribution, which is truncated to the lower/upper \code{TruncQuantile} in case of an infinite support. \code{TruncQuantile} is a global option of \pkg{distr} described in section~{\ref{options}}. For version 2.8.0 on, it has an additional internal slot \code{.finSupport} which is a logical of length 2. The first entry says if the left endpoint of the distribution is finite, the second if the right endpoint is finite. Also from version 1.9 on, class \code{DiscreteDistribution} has a subclass \code{LatticeDistribution} for supports consisting of\footnote{or at least if filled with points carrying no mass have a representation as an affine linear lattice} an affine linear lattice of form $p+iw$ for $p\in\R$, $w\in\R$, $w\not=0$ and $i=0,1,\ldots,L$, $L\in\N \cup\infty$. This class gains a slot \code{lattice} of class \code{Lattice} (see below). The purpose of this class is mainly its use in DFT/FFT methods for convolution. Slot \code{lattice} may be inspected by the usual accessor function \code{lattice()}. As by inheritance, all subclasses of \code{LatticeDistribution} which prior to version 1.9 were direct subclasses of \code{DiscreteDistribution} gain a slot \code{lattice}, too, we provide again \code{isOldVersion} and \code{conv2NewVersion} methods to check whether the object was generated by an older version of this package and to convert such an object to the new format, respectively. Also note that internally, we suppress lattice points from the support where the probability is $0$.\\ Objects of classes \code{LatticeDistribution} resp.\ \code{DiscreteDistribution}, and from version 2.0 on, also \code{AbscontDistribution}, may be generated using the generating functions \code{LatticeDistribution()} resp.\ \code{DiscreteDistribution()} resp.\ \code{AbscontDistribution()}; see also the corresponding help. E.g., to produce a discrete distribution with support $(1,5,7,21)$ with corresponding probabilities $(0.1,0.1,0.6,0.2)$ we may write <>= D <- DiscreteDistribution(supp = c(1,5,7,21), prob = c(0.1,0.1,0.6,0.2)) D plot(D) @ %\newline and to generate an absolutely continuos distribution with density proportional to $e^{-|x|^3}$, we write <>= AC <- AbscontDistribution(d = function(x) exp(-abs(x)^3), withStand = TRUE) AC plot(AC) @ %\newline As subclasses of these absolutely continuous and discrete classes, we have implemented all parametric families which already exist in the \pkg{stats} package of {\sf R} in form of {\tt [prefix]} functions ---by just providing wrappers to the original {\sf R}-functions.\\ % Schematically, the inheritance relations as well as the slots of the corresponding classes may be read off from Figure~\ref{fig1c}. Class \code{LatticeDistribution} and slot \code{gaps}, as well as additional classes \code{AffLinAbscontDistribution}, \code{AffLinDiscreteDistribution}, \code{AffLinLatticeDistribution} (c.f.\ section~\ref{afflin}) are still lacking in this graphic so far, however, as well as the classes introduced in version 2.0. \\ \ifpdf \begin{figure}[!ht]\label{fig1} \vspace{2ex} \begin{center} % \includegraphics[viewport=0 0 500 700,width=9cm]{distribution.pdf}% \includegraphics[viewport=130 150 500 750,width=9cm]{distribution.pdf}% \caption{\label{fig1c}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{Distribution} where we do not repeat inherited slots }} \end{center} \vspace{-4ex} \end{figure} \else \begin{figure}[htb]\label{fig1} \begin{center} \includegraphics[viewport=130 150 500 730,width=7.5cm]{distribution.ps}% \caption{\label{fig1c}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{Distribution} where we do not repeat inherited slots }} \end{center} \vspace{-1ex} \end{figure} \fi The most powerful use of our package probably consists in operations to automatically generate new slots \code{r}, \code{d}, \code{p}, and \code{q} ---induced by mathematical transformations. This is discussed in some detail in subsection~{\ref{methods}}. \subsubsection{Classes for Mixture Distributions} \paragraph{Lists of distributions} As a first step, we allow distributions to be gathered in lists, giving classes \code{DistrList} and \code{UnivarDistrList}, where in case of the latter, all elements must be univariate distributions. For these, the usual indexing operations with \code{[[.]]} are available. As we will use these lists to construct more general mixture distributions in some subsequent versions, we have moved these routines to package \pkg{distr} from version 1.9 on. \paragraph{Mixing distributions} To be able to work with distributions which are neither purely absolutely continuous nor purely discrete, like e.g.\ the distribution of $\min(X,1)$ for $X\sim{\cal N}(0,1)$, from package version 2.0 on, we support mixtures of distributions. These are realized as subclasses of class \code{UnivariateDistribution}. To begin with, we introduce a class \code{UnivarMixingDistribution} as subclass of class \code{UnivariateDistribution} which additionally has two slots \code{MixCoeff} and \code{MixDistr}. While the former is a numeric vector taking up the mixture coefficients of the distribution, the latter is an object of class \code{UnivarDistrList} as described below, taking up the distributions of the mixture components; as usual, these slots have their respective accessor and replacement functions. Usually, this mixing distribution will neither have a Lebesgue density nor be purely discrete, having a counting density. So slot \code{d} as a rule will be empty. Objects of this class may be generated by the generating function \code{UnivarMixingDistribution()}, see also the corresponding help. In addition there is the function \code{flat.mix} to simplify such an object converting it to an object of class \code{UnivarLebDecDistribution}; confer subsection~\ref{flat}. Note that these mixing distributions may be recursive, i.e.\ compoments of slot \code{MixDistr} may again be of class \code{UnivarMixingDistribution}. <>= library(distr) M1 <- UnivarMixingDistribution(Norm(), Pois(lambda=1), Norm(), withSimplify = FALSE) M2 <- UnivarMixingDistribution(M1, Norm(), M1, Norm(), withSimplify = FALSE) M2 @ \paragraph{Lebesgue Decomposed distributions} As seen in the above example of $\min(X,1)$, classes \code{DiscreteDistribution} and \code{Abscontdistribution} are not closed under arithmetic operations. To have such a closure, from version 2.0 on, we introduce class \code{UnivarLebDecDistribution}, which realizes a Lebesgue decomposition of a univariate distribution into a discrete and an absolutely continuous distribution. Of course, we still cannot cover distributions having a non-trivial continuous but not absolutely continuous part like the Cantor distribution, but class \code{UnivarLebDecDistribution} provides a sufficiently general compromise. Class \code{UnivarLebDecDistribution} is a subclass of class \code{UnivarMixingDistribution}, where in addition we assume that both slots \code{MixCoeff} and \code{MixDistr} are of length 2, and that the first component of slot \code{MixDistr} is of class \code{AbscontDistribution} while the second is of class \code{DiscreteDistribution}. For this class there are particular accessors \code{acWeight}, \code{discreteWeight} for the respective weights and \code{acPart}, \code{discretePart} for the respective distributions. Again there is a generating function \code{UnivarMixingDistribution()}. In addition there is the function \code{flat.LCD} to simplify such an object converting it to an object of class \code{UnivarLebDecDistribution}; confer subsection~\ref{flat}.% Classes \code{AbscontDistribution}, \code{DiscreteDistribution} and \code{UnivarLebDecDistribution} are grouped to a virtual class (more specifically a class union) \code{AcDcLcDistribution}. \paragraph{Compound distributions} From version 2.1 on, we also support compound distributions, i.e. the distributions $D$ of form $D={\cal L}(\sum_{i=1}^N X_i)$, $X_i\stackrel{\rm\scriptsize i.i.d.}{\sim} F$, a distribution on $\R$, and, independent of the $X_i$, $N\sim Q$ a distribution on $\N_0$. These distributions are implemented as class \code{CompoundDistribution} which is a subclass of class \code{UnivarMixingDistribution}; in addition this class has two slots, \code{NumbOfSummandsDistr}, the distribution $Q$ of the number of summands (or frequency distribution) and \code{SummandsDistr} the distribution $F$ of the summands. Correspondingly, in package \pkg{distrEx} there are specialized expectation and variance methods. \subsubsection{Classes for multivariate distributions and for conditional distributions} In \pkg{distrEx}, %besides providing particular distribution classes %for the Gumbel distribution (see \code{?Gumbel}) and the Pareto %distribution (see \code{?Pareto}), -> moved to RobExtremes we provide the following classes for handling multivariate distributions: \paragraph{Multivariate distribution classes} Multivariate distributions are much more complicated than univariate ones, which is why but a few exceptional ones have already been implemented to R in packages like \pkg{multnorm}. In particular it is not so clear what a slot \code{q} should mean and, in higher dimensions slot \code{p}, and possibly also slot \code{d} may become awkward. So, for multivariate distributions, realized as class \code{MultivariateDistribution}, we only insist on slot \code{r}, while the other functional slots may be left void. The easiest case is the case of a discrete multivariate distribution with finite support which is implemented as class \code{DiscreteMVDistribution}. \paragraph{Conditional distribution classes} Also arising in multivariate settings only are conditional distributions. In our approach, we realize factorized, conditional distributions where the (factorized) condition is in fact treated as an additional parameter to the distribution. The condition is realized as an object of class \code{Condition}, which is a slot of corresponding classes \code{UnivariateCondDistribution}. This latter is the mother class to classes \code{AbscontCondDistribution} and \code{DiscreteCondDistribution}. The most important application of these classes so far is regression, where the distribution of the observation given the covariates is just realized as a \code{UnivariateCondDistribution}. \subsubsection{Parameter classes} % As most distributions come with a parameter which often is of own interest, we endow the corresponding slots of a distribution class with an own parameter class, which allows for some checking like ``Is the parameter \code{lambda} of an exponential distribution non-negative?'', ``Is the parameter \code{size} of a binomial a positive integer?''\\ Consequently, we have a method \code{liesIn} that may answer such questions by a \code{TRUE}/\code{FALSE} statement. Schematically, the inheritance relations of class \code{Parameter} as well as the slots of the corresponding \mbox{(sub-)}classes may be read off in Figure~\ref{fig4c} where we do not repeat inherited slots. % \ifpdf \begin{figure}[!ht]\label{fig4} \begin{center} % \includegraphics[viewport=30 10 540 390,height=11cm,width=12cm]{parameter.pdf} % [viewport=30 10 540 390,width=12.cm] \includegraphics[viewport=30 80 400 690,height=15cm,width=12cm]{parameter.pdf} %[viewport=30 10 540 390,width=12.cm] \caption{\label{fig4c}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{Parameter} }} \end{center} \end{figure} \else \begin{figure}[htb]\label{fig4} \begin{center} \includegraphics[viewport=80 120 340 600,height=12.8cm,width=9.0cm, angle=-90]{parameter.ps}% \caption{\label{fig4c}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{Parameter} }} \end{center} \end{figure} \fi % The most important set to be used as parameter domain/sample space (\code{rSpace}) will be an Euclidean space. So \code{rSpace} and \code{EuclideanSpace} are also implemented as classes, the structure of which may be read off in Figure~\ref{fig5c}. \begin{figure}[htb]\label{fig5} \begin{center} \ifpdf \includegraphics[%viewport=0 30 600 600, width=3.9cm]{rspace.pdf} \else \includegraphics[width=3.7cm]{rspace.ps}% \fi \caption{\label{fig5c}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{rSpace} }} \end{center} \end{figure} From version 1.9 on, we also have a subclass \code{Lattice}, which is still lacking in the preceding figure. It has slots \code{pivot} (of class "numeric"), \code{width} (of class "numeric" but tested against ``{\tt ==0}'') and \code{Length} (of class "numeric" but tested to be an integer ``{\tt >0}'' or {\tt Inf}). All slots may be inspected/modified by the usual accessor/replacement functions. \subsection{Simulation classes} From version 1.6 on, the classes and methods of this subsection are available in package \pkg{distrSim}. The aim of simulation classes is to gather all relevant information about a simulation in a correspondingly designed class. To this end we introduce the class \code{Dataclass} that serves as a common mother class for both "real" and simulated data. % As derived classes we then have a simulation class where we also gather all information needed to reconstruct any particular simulation.\\ % %\newline0??????\\ From version 1.8 of this package on, we have changed the format how data / simulations are stored: In order to be able to cope with multivariate, regression and (later) time series distributions, we have switched to the common array format % {\tt samplesize x obsDim x runs} where {\tt obsDim} is the dimension of the observations. % For saved objects from earlier versions, we provide the functions \code{isOldVersion} and \code{conv2NewVersion} to check whether the object was generated by an older version of this package and to convert such an object to the new format, respectively. For objects generated from version 1.8 on, you get the package version of package~\pkg{distrSim}, under which they have been generated by a call to \code{getVersion()}. \\ % %??????1\\ Finally, coming from robust statistics we also consider situations where the majority of the data stems from an ideal situation/distribution whereas a minority comes from a contaminating source. To be able to identify ideal and contaminating observations, we also store this information in an indicator variable.\\ % As the actual values of the simulations only play a secondary role, and as the number of simulated variables can become very large, but still easily reproducible, it is not worth storing all simulated observations but rather only the information needed to reproduce the simulation. This can be done by \code{savedata}.\\ Schematically, the inheritance relations of class \code{Dataclass} as well as the slots of the corresponding \mbox{(sub-)}classes may be read off in Figure~\ref{fig2c} where we do not repeat inherited slots. \begin{figure}[htb]\label{fig2} \begin{center} \ifpdf \includegraphics[viewport=120 30 470 250,width=8.5cm]{dataclass.pdf} \else \includegraphics[width=12.5cm]{dataclass.ps}% \fi \caption{\label{fig2c}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{Dataclass} }} \end{center} \end{figure} %\newline0??????\\ Also, analogously to package %s \pkg{distr}, %and \pkg{distrEx}, global options for the output by methods \code{plot} and \code{summary} are controlled by \code{distrSimoptions()} and \code{getdistrSimoptions()} \smallskip\\ %??????1\\ \subsection{Evaluation class} From version 1.6 on, the class and methods of this subsection are available in package \pkg{distrTEst}. \\ When investigating properties of a new procedure (e.g. an estimator) by means of simulations, one typically evaluates this procedure on a large set of simulation runs and gets a result for each run. These results are typically not available within seconds, so that it is worth storing them. To organize all relevant information about these results, we introduce a class \code{Evaluation} the slots of which is filled by method \code{evaluate} ---see subsection~\ref{evaluate}. Schematically, the slots of this class may be read off in Figure~\ref{fig3c}. \begin{figure}[htb]\label{fig3} \begin{center} \ifpdf \includegraphics[viewport=60 20 420 200,width=5cm]{evaluation.pdf} \else \includegraphics[width=5cm]{evaluation.ps}% \fi \caption{\label{fig3c}{\footnotesize Slots of class \code{Evaluation}}} \end{center} \end{figure} %\newline0??????\\ A corresponding \code{savedata} method saves the object of class \code{Evaluation} in two files in the {\sf R}-working directory: one using the filename \code{} also stores the results; the other one, designed to be ``human readable'', comes as a comment file with filename \code{.comment} only stores the remaining information. The filename can be specified in the optional argument \code{fileN} to \code{savedata}; by default it is concatenated from the \code{filename} slot of the \code{Dataclass} object and \code{}, which you may either pass as argument \code{estimatorName} or by default is taken as the {\sf R}-name of the corresponding {\sf R}-function specified in slot \code{estimator}. From version 1.8 on, slot \code{result} in class \code{Evaluation} is of class \code{DataframeorNULL}, i.e.; may be either a data frame or {\tt NULL}, and slot \code{call.ev} in class \code{Evaluation} is of class "CallorNULL", i.e.; may be either a call or {\tt NULL}. Also, we want to gather \code{Evaluation} objects in a particular data structure \code{EvaluationList} (see below), so we have to be able to check whether all data sets in the gathered objects coincide. For this purpose, from this version on, class \code{Evaluation} has an additional slot \code{Data} of class \code{Dataclass}. In order not to burden the objects of this class too heavily with uninformative simulated data, in case of a slot \code{Data} of one of the simulation-type subclasses of \code{Dataclass}, this \code{Data} itself has an empty \code{Data}-slot.\\ \subsection{EvaluationList class} The class and methods of this subsection are available in package \pkg{distrTEst}. \\ In order to compare different procedures / estimators for the same problem, it is natural to gather several \code{Evaluation} objects with results of the same range (e.g.\ a parameter space) generated on the same data, i.e.; on the same \code{Dataclass} object. To this end, from version 1.8 on, we have introduced class \code{EvaluationList}. Schematically, the slots of this class may be read off in Figure~\ref{fig3c1}. \begin{figure}[htb]\label{fig3-1} \begin{center} \ifpdf \includegraphics[viewport=0 0 436 185,width=5.5cm]{EvaluationList.pdf}% \else \includegraphics[viewport=0 0 436 185,width=5.5cm]{EvaluationList.ps}% \fi %\parbox{10cm}{TO BE DONE} \caption{\label{fig3c1}{\footnotesize Slots of class \code{EvaluationList}}} \end{center} \end{figure} The common \code{Data} slot of the \code{Evaluation} objects in an \code{EvaluationList} object may be accessed by the accessor method \code{Data}. %??????1\\ % % \section{Methods}\label{methods} % \subsection{Arithmetics} We have made available quite general arithmetical operations to our distribution objects, generating new image distributions automatically. In this context some comments are due as to the interpretation of corresponding arithmetic expressions of distribution objects: \begin{description} \item[{\Large \sc Caveat:\/}] {\bf These arithmetics operate on the corresponding r.v.'s and {\bf not} on the distributions.} \end{description} (For the latter, they only would make sense in restricted cases like convex combinations).\\ Martin M\"achler pointed out that this might be confusing. So, this warning is also issued on attaching package \pkg{distr}, and, by default, again whenever a \code{Distribution} object, produced by such arithmetics is shown or printed; this also applies to the last line in <>= A1 <- Norm(); A2 <- Unif() A1 + A2 @ \begin{verbatim} Warning message: arithmetics on distributions are understood as operations on r.v.'s see 'distrARITH()'; for switching off this warning see '?distroptions' in: print(object) \end{verbatim} This behaviour will soon be annoying so you may switch it off setting the global option \code{WarningArith} to \code{FALSE} (see section~\ref{options}).\medskip\\ % Function \code{distrARITH()} displays the following comment \begin{footnotesize} \begin{quote} \begin{verbatim} ###################################################################### # On arithmetics operating on distributions in package "distr" ###################################################################### Attention: Special caution is due in the followin issues %-------------------------------------------------------------------- % Interpretation of arithmetics %-------------------------------------------------------------------- Arithmetics on distribution objects are understood as operations on corresponding random variables (r.v.'s) and _not_ on distribution functions or densities; e.g. sin( Norm() + 3 * Norm() ) + 2 returns a distribution object representing the distribution of the r.v. sin(X+3*Y)+2 where X and Y are r.v.'s i.i.d. N(0,1). %-------------------------------------------------------------------- % Adjusting accuracy %-------------------------------------------------------------------- Binary operators like "+", "-" would loose their elegant calling e1 + e2 if they had to be called with an extra argument controlling their accuracy. Therefore, this accuracy is controlled by global options. These options are inspected and set by distroptions(), getdistrOption(), see ?distroptions %-------------------------------------------------------------------- % Multiple instances in expressions and independence %-------------------------------------------------------------------- Special attention has to be paid to arithmetic expressions of distributions involving multiple instances of the same symbol: /-> All arising instances of distribution objects in arithmetic expressions are assumed stochastically independent. <-/ As a consequence, whenever in an expression, the same symbol for an object occurs more than once, every instance means a new independent distribution. So for a distribution object X, the expressions X+X and 2*X are _not_ equivalent. The first means the convolution of distribution X with distribution X, i.e. the distribution of the r.v. X1 + X2, where X1 and X2 are identically distributed according to X. In contrast to this, the second expression means the distribution of the r.v. 2 X1 = X1 + X1, where again X1 is distributed according to X. Hence always use 2*X, when you want to realize the second case. Similar caution is due for X^2 and X*X and so on. %-------------------------------------------------------------------- % Simulation based results varying from call to call %-------------------------------------------------------------------- At several instances (in particular for non-monotone functions from group Math like sin(), cos()) new distributions are generated by means of RtoDPQ, RtoDPQ.d, RtoDPQ.LC. In these functions, slots d, p, q are filled by simulating a large number of random variables, hence they are stochastic estimates. So don't be surprised if they will change from call to call. \end{verbatim} \end{quote} \end{footnotesize} % \subsection{Affine linear transformations}\label{afflin} % We have overloaded the operators \code{"+"}, \code{"-"}, \code{"*"}, \code{"/"} such that affine linear transformations which involve only single univariate r.v.'s are available; i.e.\ is expressions like \code{Y=(3*X+5)/4} are permitted for an object \code{X} of class \code{AbscontDistribution} or \code{DiscreteDistribution} (or some subclass), giving again an object \code{Y} of class \code{AbscontDistribution} or \linebreak[4]\code{DiscreteDistribution} (in general). Here the corresponding transformations of the \code{d}, \code{p}, and \code{q}-functions are done analytically.\\ % From version 1.9 on, we use subclasses \code{AffLinAbscontDistribution}, \code{AffLinDiscreteDistribution}, \code{AffLinLatticeDistribution} as classes of the return values to enhance accuracy of functinals like \code{E}, \code{var}, etc.\ in package \pkg{distrEx}. These classes in addition to their counterparts without prefix ``\code{AffLin}'' have slots \code{a}, \code{b}, and \code{X0}, to capture the fact that an object of this class is distributed as \code{a * X0 + b}. Also, we introduce a class union \code{AffLinDistribution} of classes \code{AffLinAbscontDistribution} and \linebreak[4]\code{AffLinDiscreteDistribution}. Consequently, the result \code{Y} of \code{Y <- a1 * X + b1} for an object \code{X} of (a subclass of) class \code{AffLinDiscreteDistribution} (if \code{ a != 0}) is of the same class as \code{X} but with slots \code{Y@a = a1 * X@a}, \code{Y@b = b1 + a1 * X@b}, \code{Y@X0 = X@X0}. In version 2.0, the same principle has been applied to introduce class \code{AffLinUnivarLebDecDistribution}. All \code{AffLin}-xxx distribution classes are grouped to a virtual class (more specifically a class union) \code{AffLinDistribution}. % \subsection{Decompositions, Flattening and Other Simplifications}\label{flat} % \paragraph{Decompositions: } One of the issues when programming the distribution of the multiplication of independent random variables is that we have to treat positive and negative part (and, if nontrivial, point mass to $0$) separately. To this end, from version 2.0 on, there are methods \code{decomposePM} to decompose a discrete, an absolutely continuous or a Lebesgue decomposed distribution into its respective parts. % <>= decomposePM(Norm()) decomposePM(Binom(2,0.3)-Binom(5,.4)) decomposePM(UnivarLebDecDistribution(Norm(),Binom(2,0.3)-Binom(5,.4), acWeight = 0.3)) @ \paragraph{Simplification by flattening: } On the other hand, concatenating mathematical operations would easily yield quite complicated structures. A first thing to do is to look whether some components carry mass (approximately) 0. \code{simplifyD} uses this to cancel out such components, and if possible return simpler types; see also the help to this function. Also, sometimes one would like to let collapse a whole list of distributions (as in the \code{MixDistr} of a \code{UnivarMixingDistribution} object) into a simpler \code{UnivarLebDecDistribution}-class form. This is what is done in the the functions \code{flat.mix} and \code{flat.LCD}. <>= D1 <- Norm() D2 <- Pois(1) D3 <- Binom(1,.4) D4 <- UnivarMixingDistribution(D1,D2,D3, mixCoeff = c(0.4,0.5,0.1), withSimplify = FALSE) D <- UnivarMixingDistribution(D1,D4,D1,D2, mixCoeff = c(0.4,0.3,0.1,0.2), withSimplify = FALSE) D D0<-flat.mix(D) D0 @ Many arithmetic operations described in the subsequent sections do this simplification on their return value, according to the global option \code{SimplifyD}. \paragraph{Simplification by collapsing: } Dealing with discrete distributions, several arithmetical/mathematical operations tend to create new quite unequally spaced supports (mostly by joining supports of operands), often accumulating support points somewhere. In order to overcome this, from version 2.1 on, following a proposal by \href{mailto:jacobvanetten@yahoo.com}{Jacob van Etten}, whenever support points get closer to each other than prescribed in global option \code{DistrResolution} (see also section~\ref{options}), and if in addition global option \code{DistrCollapse} is \code{TRUE}, we collapse these support point, using the median of the collapsed points as new support point, to which we attribute the cumulated probability mass. If in addition global option \code{DistrCollapse.Unique.Warn} is \code{TRUE} we issue a warning on collapsing occasions. \subsection[The group math of unary mathematical operations]{The group \code{math} of unary mathematical operations} % Also the group \code{math} of unary mathematical operations is available for distribution classes; so expressions like \code{exp(sin(3*X+5)/4)} are permitted. % The corresponding \code{r} method consists in simply performing the transformation to the simulated values of \code{X}. The corresponding (default-) \code{d}, \code{p} and \code{q}-functions are obtained by simulation, using the technique described in the following subsection.\\ By means of \code{substitute}, the bodies of the \code{r}, \code{d}, \code{p}, \code{q}-slots of distributions show the parameter values with which they were generated; in particular, convolutions and applications of the group \code{math} may be traced in the \code{r}-slot of a distribution object, compare\newline \code{r(sin(Norm()) + cos(Unif() * 3 + 2))}. Initially, it might be irritating that the same ``arithmetic'' expression evaluated twice in a row gives two different results, compare <>= A1 <- Norm(); A2 <- Unif() d(sin(A1 + A2))(0.1) d(sin(A1 + A2))(0.1) sin(A1 + A2) @ This is due to the fact, that all slots are filled starting from simulations. To explain this, a warning is issued by default, whenever a \code{Distribution} object, filled by such simulations is shown or printed; this also applies to the last line in the preceding code sniplet. This behaviour may again be switched off by setting the global option \code{WarningSim} to \code{FALSE} (see section~\ref{options}).\\ As they are frequently needed, from version 1.9 on, math operations \code{abs()}, \code{exp()}, and ---if an {\sf R}-version $\ge$ {\tt 2.6.0} is used--- also \code{log()} are implemented in an analytically exact form, i.e.; with exact expressions for slots \code{d}, \code{p}, and \code{q}. % \subsection{Construction of \code{d}, \code{p}, and \code{q} from \code{r}} % In order to facilitate automatic generation of new distributions, in particular those arising as image distributions under transformations of correspondingly distributed random variables, we provide ad hoc methods that should be overloaded by more exact ones wherever possible. As, at least in principle each of these slots is sufficient for the reconstruction of the other ones, we follow the following strategy: \\ \begin{center} \begin{tabular}{llll|p{10cm}} \Code{d} & \Code{p} & \Code{q} & \Code{r} & reconstruction\\ \hline $+$&$+$&$+$&$+$&no reconstruction necessary\\ $+$&$+$&$+$&$-$&\Code{r} as \Code{q(X)(runif(n))}\\ $+$&$+$&$-$&$+$&\Code{q} by numerical inversion from \Code{p}\\ $+$&$+$&$-$&$-$&\Code{q} again from \Code{p} and \Code{r} again from slot \Code{q}\\ $+$&$-$&$+$&$+$&\Code{p} by numerical integration from \Code{d}\\ $+$&$-$&$+$&$-$&\Code{p} from \Code{d}, and \Code{r} from \Code{q}\\ $+$&$-$&$-$&$+$&\Code{p} from \Code{d}, and \Code{q} from \Code{p}\\ $+$&$-$&$-$&$-$&\Code{p} from \Code{d}, \Code{q} from \Code{p} and \Code{r} from \Code{q}\\ $-$&$+$&$+$&$+$&\Code{d} by numerical differentiation (with \Code{D1ss} from package \pkg{sfsmisc} from \Code{p}\\ $-$&$+$&$+$&$-$&\Code{d} from \Code{p}, \Code{r} from \Code{q}\\ $-$&$+$&$-$&$+$&\Code{d}, \Code{q} from \Code{p}\\ $-$&$+$&$-$&$-$&\Code{d}, \Code{q} from \Code{p}, \Code{r} from \Code{q}\\ $-$&$-$&$+$&$+$&\Code{p} by numerical inversion from \Code{q}, \Code{d} from \Code{p}\\ $-$&$-$&$+$&$-$&\Code{p}, \Code{r} from \Code{q}, \Code{d} from \Code{p}\\ $-$&$-$&$-$&$+$& use \Code{RtoDPQ}\\ $-$&$-$&$-$&$-$¬ allowed\\ \end{tabular}\\ \end{center} More specifically, by means of the function \code{RtoDPQ} we first generate $10^{\footnotesize\tt RtoDPQ.e}$ random numbers where \code{RtoDPQ.e} is a global option of this package and is discussed in section~{\ref{options}}. % A density estimator is evaluated along this sample, the distribution function is estimated by the empirical c.d.f. and, finally, the quantile function is produced by numerical inversion. Of course the result is rather crude as it relies on the law of large numbers only, but this way all transformations within the group \code{math} become available. If the input of the transformation is of class \code{UnivarLebDecDistribution}, \code{RtoDPQ} is replaced by \code{RtoDPQ.LC}. In this case, replicated values are taken as belonging to the discrete part, for which the distribution is generated according to the corresponding frequencies with the generating function \code{DiscreteDistribution()}. With the remaining, non replicated values, the absolutely continuous part is reconstructed just as with \code{RtoDPQ}. Where laws under transformations can easily be computed exactly ---as for affine linear transformations--- we replace this procedure by the exactly transformed \code{d}, \code{p}, \code{q}-methods. % \subsection{Convolution} % A convolution method for two independent r.v.'s is implemented by means of explicit calculations for discrete summands, and by means of DFT/FFT\footnote{Details to be found in \cite{K:R:S:04}} if one of the summands is absolutely continuous or (from version 1.9 on:) both are lattice distributed with a common lattice as support. This method automatically generates the law of the sum of two independent variables/distributions $X$ and $Y$ of any univariate distributions ---or in {\tt S4}-jargon: the addition operator \code{"+"} is overloaded for two objects of class \code{UnivariateDistribution} and corresponding subclasses. % \subsection{Further Binary Operators} % Having implemented a class for Lebesgue decomposed distributions, we have been able to realize further binary operators, in particular we have exact analytical constructions for multiplication, division, exponentiation: <>= A1 <- Norm(); A2 <- Unif() A1A2 <- A1*A2 plot(A1A2) @ <>= A12 <- 1/(A2 + .3) plot(A12) @ <>= B <- Binom(5,.2)+1 A1B <- A1^B plot(A1B, xlim=c(-3,3)) @ <>= plot(1.2^A1) @ <>= plot(B^A1) @ % \subsection{Truncation, Pairwise Minimum/Maximum, Huberization}\label{TruncMin} % Up to version 2.0, we have had truncation, Huberization and minimum and maximum of random variables as illustrating demos; in particular the last three could not be realized in a completely satisfactory manour, as Lebesgue decomposed distributions had not been available before. Now these illustrations have moved into the package itself: <>= H <- Huberize(Norm(),lower=-1,upper=2) plot(H) @ <>= T <- Truncate(Norm(),lower=-1,upper=2) plot(T) @ <>= M1 <- Maximum(Unif(0,1), Minimum(Unif(0,1), Unif(0,1))) plot(M1) @ <>= M2 <- Minimum(Exp(4),4) plot(M2) @ <>= M3 <- Minimum(Norm(2,2), Pois(3)) plot(M3) @ % \paragraph{Enhanced accuracy by using log scales:} Following an idea proposed by Peter Dalgaard on {\tt r-help}, confer \href{https://stat.ethz.ch/pipermail/r-help/2008-September/174295.html}% {\tt\footnotesize https://stat.ethz.ch/pipermail/r-help/2008-September/174295.html} we use the \code{log.p} argument to be able to simulate from the far out tails. To this end, from version 2.1, our distribution classes gain internal slots \code{.lowerExact} and \code{.logExact} which control whether the corresponding slot functions \code{p} and \code{q} use particular code for computing on the log-scale respectively whether they use explicit representations for upper quantiles (instead of computing $1-{\rm lowerQuantile}$). If a corresponging distribution object has slots \code{.lowerExact} and \code{.logExact} both \code{TRUE}, we may use this to produce accurate simulations, especially for respectively truncated distributions: <>= N <- Norm() TN <- Truncate(N, 20,22) r(TN)(20) ## simulation accurate although : p(N)(20, lower.tail = FALSE) ## prob that N>=20 @ \subsection{Additional helper functions} % From version 1.9 on, there are methods \code{p.l} and \code{q.r} available for \code{DiscreteDistribution} objects for the left-continuous variant of the cdf, i.e.; $t\mapsto {\rm p.l}(t)=P(X>= B <- Binom(5,0.5) p(B)(3) p.l(B)(3) q(B)(.5) q.r(B)(0.5) @ Again from version 2.1 on, class \code{DiscreteDistribution} has a helper method \code{prob} which returns vector of probabilities for the support points. More precisely, the return value is a numeric vector named by the values of support points. This method is also available for objects of class \code{UnivarLebDecDistribution} where it returns a two-row matrix where the column names are the values of the support points, and the first row, named \code{"cond"}, contains the probabilities of the discrete part (summing up to $1$), while the second row, named \code{"abd"} contains the probabilities of discrete part multiplied with \code{discreteWeight}; hence these values are the absolute probabilities of the support points. Again for objects of class \code{UnivarLebDecDistribution}, we have methods \code{p.ac}, \code{d.ac}, \code{p.discrete}, \code{d.discrete} to give the density/probability and the cumulative distribution function of the discrete and absolutely continuous ({\tt ac}) part of the distribution. All these methods have an extra argument \code{CondOrAbs} with default value \code{"cond"}, which if it does not partially match \code{"abs"}, returns exactly slot \code{p} (resp. \code{d}) the respective a.c./ discrete part of the object; else the return value is weighted by the respective weight of the part, i.e. \code{acWeight}/\code{discreteWeight}. <>= B0 <- as(Binom(5,0.5),"DiscreteDistribution") ## coercion necessary: ## otherwise slot "prob" of B0 will be returned prob(B0) HN <- Huberize(N, -2,1) prob(HN) @ In order to convert arbitrary univariate distributions to \code{AbscontDistribution} from version 2.1 on, we have function \code{makeAbscontDistribution} which takes slot \code{p} and uses \code{AbscontDistribution()} to generate a corresponding smoothed version; to smear out mass points on the border of the support, these upper and lower bounds are somewhat enlarged. Note that in the result, slots \code{p} and \code{q} are not replaced but rather taken unchanged from the argument: <>= par(mfrow=c(2,3)) plot(makeAbscontDistribution(Nbinom(5,.5)),mfColRow=FALSE) plot(makeAbscontDistribution(HN),mfColRow=FALSE) par(mfrow=c(1,1)) @ Methods \code{getLow}, \code{getUp} available for classes \code{DiscreteDistribution}, \code{AbscontDistribution}, \code{UnivarLebDecDistribution}, and \code{UnivarMixingDistribution} return ``numerical'' end points of the respective supports: if these distributions have finite end points, these are returned, else a lower/upper \code{eps}-quantile is returned, where \code{ep}, by default, is set to global options \code{TruncQuantile}. <>= getLow(Nbinom(5,0.5)) getUp(Nbinom(5,0.5)) getLow(Norm(5,0.5)) getUp(Norm(5,0.5)) @ % \subsection{Overloaded generic functions} Methods \code{print}, \code{plot}, \code{show} and \code{summary} have been overloaded for classes \code{Distribution}, \code{Dataclass}, \code{Simulation}, \code{ContSimulation}, as well as \code{Evaluation} and \code{EvaluationList} to produce ``pretty'' output. More specifically there are also particular \code{show} methods for classes \code{UnivarDistrList}, \code{UnivarMixingDistribution} and \code{UnivarLebDecDistribution}. %\newline 0??????\\ \code{print}, \code{plot}, \code{show} and \code{summary} have additional, optional arguments for plotting subsets of the simulations / results: index vectors for the dimensions, the runs, the observations, and the evaluations may be passed using arguments \code{obs0}, \code{runs0}, \code{dims0}, \code{eval0}, confer\newline \code{help("-methods",package=)} where \code{} stands for \code{plot}, \code{show}, \code{print}, or \code{plot}, and \code{} stands for either \pkg{distrSim} or \pkg{distrTEst}. \subsection{Plotting} \subsubsection[{Plotting for Distribution objects}]{Plotting for \code{Distribution} objects} For an object of class \code{Distribution}, \code{plot} displays the density/probability function, the c.d.f.\ and the quantile function of a distribution. Note that all usual parameters of \code{plot} remain valid. For instance, you may increase the axis annotations and so on. \code{plot()} can also cope with \code{log}-arguments. \paragraph{\code{xlim} argument:} More importantly, you may also override the automatically chosen $x$-region by passing an \code{xlim} argument: <>= plot(Cauchy()) @ <>= plot(Cauchy(),xlim=c(-4,4)) @ From version 2.1, the automatic choice of the x-range of \code{plot} for distributions has been enhanced: just as for expectation (see subsection~\ref{Expect}) we use both a quantile and a scale based approach to get sensible values. Also argument \code{ylim} can now be matrix-valued to use different limits for the various panels (\code{d}, \code{p}, \code{q}, or in case of \code{UnivarLebDecDistribution}, \code{p}, \code{q}, \code{d.ac}, \code{p.ac}, \code{q.ac}, \code{d.discrete}, \code{p.discrete}, \code{q.discrete}). \paragraph{titles:} Moreover you may control optional main, inner titles and subtitles with arguments \code{main} / \code{sub} / \code{inner}. To this end there are preset strings substituted in both expression and character vectors (where in the following \code{x} denotes the argument with which \code{plot()} was called) \begin{itemize} \item[\%A] deparsed argument \code{x} \item[\%C] class of argument \code{x} \item[\%P] comma-separated list of parameter values of slot \code{param} of argument \code{x} \item[\%Q] comma-separated list of parameter values of slot \code{param} of argument \code{x} in parenthesis unless this list is empty; then \code{""} \item[\%N] comma-separated {\tt = } - list of parameter values of slot \code{param} of argument \code{x} \item[\%D] time/date at which plot is/was generated \end{itemize} This substitution can be switched off by means of argument \code{withSubst}. As usual you may control title sizes and colors with \code{cex.main} / \code{cex.inner} / \code{cex.sub} respectively with \code{col} / \code{col.main} / \code{col.inner} / \code{col.sub}. Additionally it may be helpful to control top and bottom margins with arguments \code{bmar}, \code{tmar}. \paragraph{step-function features:} We provide different default symbols for unattained [\code{pch.u}] / attained [\code{pch.a}] one-sided limits, which may be overridden by corresponding arguments \code{pch} / \code{pch.a} / \code{pch.u}. For objects of class \code{AbscontDistribution}, you may set the number of grid points used by an \code{ngrid} argument; also the ``quantile''-panel takes care of finite left/right endpoints of support and optionally tries to identify constancy region of the \code{p}-slot. For objects of class \code{DiscreteDistributions}, we use \code{stepfun()} from package \pkg{base} as far as possible and (also for panel ``q'' for \code{AbscontDistributions}) consequently take over its arguments \code{do.points}, \code{verticals}, \code{col.points} / \code{col.vert} / \code{col.hor} and \code{cex.points}. As examples consider the following plots: The standard plot for a discrete distribution is shown in Figure~\ref{plotexPic1}. \begin{figure}[p] \begin{center} <>= plot(Binom(size = 4, prob = 0.3)) @ \caption{\label{plotexPic1}{Standard plot for discrete distributions }} \end{center} \end{figure} Omitting the point symbols at jump points (\code{do.points = FALSE}) and the corresponding vertical lines (\code{verticals = FALSE}) in the \code{p} and \code{q} panels gives Figure~\ref{plotexPic2}. \begin{figure}[p] \begin{center} <>= plot(Binom(size = 4, prob = 0.3), do.points = FALSE, verticals = FALSE) @ \caption{\label{plotexPic2}{Plot for discrete distributions without extra symbols at jump points and vertical lines }} \end{center} \end{figure} Instead we might use a somewhat enlarged (\code{cex.main = 1.6}) main title (\code{main = TRUE}) and omit the panel titles (\code{inner = FALSE}). To this end, we should increase the margin between main title and panels (by \code{tmar = 6}). This is shown in Figure~\ref{plotexPic3}. \begin{figure}[p] \begin{center} <>= plot(Binom(size = 4, prob = 0.3), main = TRUE, inner = FALSE, cex.main = 1.6, tmar = 6) @ \caption{\label{plotexPic3}{Plot for discrete distributions using a main title }} \end{center} \end{figure} Changing point sizes (\code{cex.points}) and line width (\code{lwd}), and using (default) panel titles (set \code{TRUE} by default) gives a somewhat different picture as in Figure~\ref{plotexPic4}. \begin{figure}[p] \begin{center} <>= plot(Binom(size = 4, prob = 0.3), cex.points = 1.2, pch = 20, lwd = 2) @ \caption{\label{plotexPic4}{Plot for discrete distributions using panel titles and changed point sizes }} \end{center} \end{figure} Different colors for different plot elements can be used by arguments \code{col} (general), \code{col.points} (jump points), \code{col.sub} (sub-titles), \code{col.inner} (panel titles). In Figure~\ref{plotexPic5}, this is shown; the figure uses main (\code{main = TRUE}), inner (\code{TRUE} by default) and sub (\code{sub = TRUE}) titles, using the default titles respectively. \begin{figure}[p] \begin{center} <>= B <- Binom(size = 4, prob = 0.3) plot(B, col="red", col.points = "green", main = TRUE, col.main="blue", col.sub = "orange", sub = TRUE, cex.sub = 0.6, col.inner = "brown") @ \caption{\label{plotexPic5}{Plot for discrete distributions using main, panel and sub titles with changed colors }} \end{center} \end{figure} Changing plot size for marking the jump points by \code{cex.points = 1.2} and selecting symbols for left limits (by \code{pch.u = 20}) and right values (by \code{pch.a = 20}) at jump points is exemplified in Figure~\ref{plotexPic6}. \begin{figure}[p] \begin{center} <>= plot(Nbinom(size = 4,prob = 0.3), cex.points = 1.2, pch.u = 20, pch.a = 10) @ \caption{\label{plotexPic6}{Plot for discrete distributions with different symbols for marking jump points }} \end{center} \end{figure} Using log-scale for both axes by \code{log = "xy"} and a reduced number of grid points for plotting by \code{ngrid = 200} is shown in Figure~\ref{plotexPic7}. \begin{figure}[p] \begin{center} <>= plot(Chisq(), log = "xy", ngrid = 100) @ \caption{\label{plotexPic7}{Plot for absolutely continuous distributions using log scales }} \end{center} \end{figure} Changing line type by \code{lty = 3} and color by \code{col = "red"}, axis notation orientation by \code{las = 2} and the number of grid points used for plotting by \code{ngrid = 200} is shown in Figure~\ref{plotexPic8}. \begin{figure}[p] \begin{center} <>= plot(Norm(), lwd=3, col = "red", ngrid = 200, lty = 3, las = 2) @ \caption{\label{plotexPic8}{Plot for absolutely continuous distributions with different line type and reduced number of grid points }} \end{center} \end{figure} You may also use hook functions in distribution plots from version 2.0 on, which is especially useful for plotting grids in the background as shown in Figure~\ref{plotexPic9}; this plot also has non-default titles using preset strings substitutions (\%N, \%C, \%P, \%A, \%D). \begin{figure}[p] \begin{center} <>= plot(Norm(), panel.first = grid(), main = "my Distribution: %A", inner = list(expression(paste(lambda, "-density of %C(%P)")), "CDF", "Pseudo-inverse with param's %N"), sub = "this plot was correctly generated on %D", cex.inner = 0.9, cex.sub = 0.8) @ \caption{\label{plotexPic9}{Plot for absolutely continuous distributions using non-standard titles and with a background grid }} \end{center} \end{figure} Special care is take to correctly depict jumps in the quantile function / gaps in the support; from version 2.0 on you have function \code{setgaps()} to automatically such gaps, but in case of very small density values this may give some ``false positives'' as shown for the $\chi^2$ distribution in Figure~\ref{plotexPic10} \begin{figure}[p] \begin{center} <>= Ch <- Chisq(); setgaps(Ch, exactq = 3) plot(Ch, cex = 1.2, pch.u = 20, pch.a = 10, col.points = "green", col.vert = "red") @ \caption{\label{plotexPic10}{Plot for absolutely continuous distributions with automatic gap detection }} \end{center} \end{figure} From version 2.0 on, you may override the given panel configuration using argument \code{mfColRow=FALSE} ---see Figure~\ref{plotexPic11}. \begin{figure}[p] \begin{center} <>= layout(matrix(c(1,3,2,3), nrow=2)) plot(N, mfColRow = FALSE) @ \caption{\label{plotexPic11}{Plot for absolutely continuous distributions using non-standard panel configuration }} \end{center} \end{figure} Following a suggestion by \href{mailto:unwin@math.uni-augsburg.de}{Anthony Unwin}, from version 2.1 you may also select the panels you like to plot, using argument \code{to.draw.arg}; the corresponding panels are named and may either be given by name or by number (the rank in drawing the default ``complete'' plot); for details see \code{?plot}. As example for this panel selection, see Figure~\ref{plotexPic12}. \begin{figure}[p] \begin{center} <>= layout(matrix(c(rep(1,6),2,2,3,3,4,4,5,5,5,6,6,6), nrow=3, byrow=TRUE)) plot(HN, mfColRow = FALSE, to.draw.arg=c("p","d.c","p.c","q.c", "p.d","q.d")) @ \caption{\label{plotexPic12}{Plot for Lebesgue decomposed distributions with user-chosen selection of plotted panels }} \end{center} \end{figure} \subsubsection[{Plotting for Dataclass objects}]{Plotting for \code{Dataclass} objects} For objects of class \code{Dataclass} ---or of a corresponding subclass--- \code{plot} plots the sample against the run index and in case of \code{ContSimulation} the contaminating variables are highlighted by a different color. Additional arguments controlling the plot as in the default \code{plot} command may be passed, confer \code{help("plot-methods",package="distrSim")}. \subsubsection[{Plotting for Evaluation objects}]{Plotting for \code{Evaluation} objects} For an object of class \code{Evaluation}, \code{plot} yields a boxplot of the results of the evaluation. For an object of class \code{EvaluationList}, \code{plot} regroups the list according to the different columns/coordinates of the result of the evaluation; for each such coordinate, a boxplot is generated, containing possibly several procedures, and, if evaluated at a \code{Contsimulation}, the plots are also grouped into evaluations on ideal and real data. As for the usual \code{boxplot} function you may pass additional ``\code{plot}-type'' arguments to this particular \code{plot} method, confer \code{help("plot-methods",package="distrTEst")}. In particular, the \code{plot}-arguments \code{main} and \code{ylim}, however, may also be transmitted coordinatewise, i.e.; a vector of the same length as the dimension of the result {\tt resDim} (e.g.\ parameter dimension), respectively a {\tt 2 x resDim} matrix, or they may be transmitted globally, using the usual {\tt S} recycling rules. %\newline??????1 \subsubsection[{Plotting for L2paramFamily objects}]{Plotting for \code{L2paramFamily} objects} In package \pkg{distrMod} we have an additional plotting method for class \code{L2paramFamily}; besides the underlying model distribution this also plots the coordinates of the $L_2$-derivative (scores function). From version 2.1 on, this plot is as flexible as the one for \code{Distribution}. You can select the panels to be plotted by argument \code{to.draw.arg}, and may use (almost) all arguments generally available for \code{plot}. In particular, also you may use an argument \code{panel.first=grid()} to produce a grid behind the panel. \subsection[liesInSupport]{\code{liesInSupport}} For all discrete distribution classes, we have methods \code{liesInSupport} to check whether a given vector/ a matrix of points lies in the support of the distribution. From version 2.8.0 on, we have better control on situations where the true support of the distribution is large/infinite to one or both sides---before it used \code{support}, which in this case is truncated to the relevant support points. \subsection[Simulation (in package distrSim)]% {Simulation (in package \pkg{distrSim})} % From version 1.6 on, \code{simulation} is available in package \pkg{distrSim}. For the classes \code{Simulation} and \code{ContSimulation}, we normally will not save the current values of the simulation, as they can easily be reproduced knowing the values of the other slots of this class. % So when declaring a new object of either of the two classes, the slot \code{Data} will be empty (\code{NULL}). To fill it with the simulated values, we have to apply the method \code{simulate} to the object. This has to be redone whenever another slot of the object is changed. % To guarantee reproducibility, we use the slot \code{seed}.\\ % This slot is controlled and set through \href{mailto:pgilbert@bank-banque-canada.ca}{Paul Gilbert's} \pkg{setRNG} package. By default, \code{seed} is set to \code{setRNG()}, which returns the current ``state'' of the random number generator (RNG). So the user does not need to specify a value for \code{seed}, and nevertheless may reproduce his samples: He simply uses \code{simulate} to fill the \code{Data} slot. If the user wants to, he may also set the \code{seed} explicitly via the replacement function \code{seed()}, but has to take care of the correct format himself, confer the documentation of \code{setRNG}. One easy way to fill the \code{Data} slot of a simulation \code{X} with ``new'' random numbers is <>= X <- Simulation() seed(X) <- setRNG() simulate(X) Data(X)[1:10] @ % \subsection[Evaluate (in package distrTEst)]% {Evaluate (in package \pkg{distrTEst})}\label{evaluate} % From version 1.6 on \code{evaluate} is available in \pkg{distrTEst}. In an object of class \code{Evaluation} we store relevant information about an evaluation of a statistical procedure (estimator/test/predictor) on an object of class \code{Dataclass}, including the concrete results of this evaluation. An object of class \code{Evaluation} is generated by an application of method \code{evaluate} which takes as arguments an object of class \code{Dataclass} and a procedure of type \code{function}. As an example, confer Example~\ref{simex}. For data of class \code{Contsimulation}, the result takes a slightly different, combining evalations on ideal and real data. % \subsection{Is-Relations} % By means of \code{setIs}, we have ``told'' {\sf R} that a distribution object \code{obj} of class \begin{itemize} \item \code{"Unif"} with $\code{Min} \doteq 0$ and $\code{Max} \doteq 1$ also is a Beta distribution with parameters \code{shape1 = 1, shape2 = 1} \item \code{"Geom"} also is a negative Binomial distribution with parameters \code{size = 1, prob = prob(obj)} \item \code{"Cauchy"} with $\code{location} \doteq 0$ and $\code{scale}\doteq 1$ also is a T distribution with parameters \code{df = 1, ncp = 0} \item \code{"Exp"} also is a Gamma distribution with parameters \code{shape = 1, scale = 1/rate(obj)} and a Weibull distribution with parameters \code{shape = 1, scale = 1/rate(obj)} \item \code{"Chisq"} with non-centrality $\code{ncp}\doteq 0$ also is a Gamma distribution with parameters \code{shape = df(obj)/2, scale = 2} \item \code{"DiscreteDistribution"} (from version 1.9 on) with an equally spaced support also is a \code{"LatticeDistribution"} \end{itemize} % \subsection{Further methods} % When iterating/chaining mathematical operations on a univariate distribution, generation process of random variables can become clumsy and slow. To cope with this, we introduce a sort of ``Forget-my-past''-method \code{simplifyr} that replaces the chain of mathematical operations in the \code{r}-method by drawing with replacement from a large sample ($10^{\tt RtoDPQ.e}$) of these. % \subsection[Functionals (in package distrEx)]% {Functionals (in package \pkg{distrEx})}\label{Functionals} % \subsubsection{Expectation}\label{Expect} The most important contribution of package \pkg{distrEx} is a general expectation operator. In basic statistic courses, the expectation ${\rm E}$ may come as ${\rm E}\,[X]$, ${\rm E}\,[f(X)]$, ${\rm E}\,[X|Y=y]$, or ${\rm E}\,[f(X)|Y=y]$. Our operator (or in S4-language ``generic function'') \code{E} covers all of these situtations (or {\it signatures\/}). \paragraph{default call} The most frequent call will be \code{E(X)} where \code{X} is an (almost) arbitrary distribution object. More precisely, if \code{X} is of a specific distribution class like \code{Pois}, it is evaluated exactly using analytic terms. Else if it is of class \code{DiscreteDistribution} we use a sum over the support of \code{X}, and if it is of class \code{AbscontDistribution} we use numerical integration\footnote{i.e., we first try (really(!): \code{try}) \code{integrate} and if this fails we use Gau{\ss}-Legendre integration according to \cite{NumR:92}, see also \code{?distrExIntegrate}}; for {\tt X} of class \code{UnivarLebDecDistribution}, expectations for discrete and absolutely continuous part are evaluated separately and subsequently combined according to their respective weights. If we only know that {\tt X} is of class \code{UnivariateDistribution} we use Monte-Carlo integration. This also is the default method in for class \code{MultivariateDistribution}, while for \code{DiscreteMVDistribution} we again use sums. For an object \code{Y} of a subclass of class union \code{AffLinDistribution}, we determine the expectation as \code{Y@a * E(Y@X0) + Y@b} and hence use analytic terms for \code{X0} if available. \paragraph{with a function as argument} we proceed just as without: if \code{X} is of class\linebreak[4] \code{DiscreteDistribution}, we use a sum over the support of \code{X}, and if \code{X} is of class\linebreak[4] \code{AbscontDistribution} we use numerical integration; else we use Monte-Carlo integration. \paragraph{in addition: with a condition as argument} we simply use the corresponding \code{d} respective \code{r} slots with the additional argument \code{cond}. \paragraph{exact evaluation} is available for \code{X} of class \code{Arcsine}, \code{Beta} (for noncentrality $0$), \code{Binom}, \code{Cauchy}, \code{Chisq}, \code{Dirac}, \code{Exp}, \code{Fd}, \code{Gammad}, \code{Geom}, %\code{Gumbel}, -> moved to RobExtremes \code{Hyper}, \code{Logis}, \code{Lnorm}, \code{Nbinom}, \code{Norm}, %\code{Pareto}, -> moved to RobExtremes \code{Pois}, \code{Td}, \code{Unif}. % \code{Weibull} -> moved to RobExtremes \paragraph{examples} $ \mbox{ }$\newline %(ll)expectationPrep, eval = TRUE, echo=FALSE(ggeq) %SweaveListingOptions("inSweave"=TRUE) %(at) <>= D4 <- LMCondDistribution(theta = 1) D4 # corresponds to Norm(cond, 1) N <- Norm(mean = 2) E(D4, cond = 1) E(D4, cond = 1, useApply = FALSE) E(as(D4, "UnivariateCondDistribution"), cond = 1) E(D4, function(x){x^2}, cond = 2) E(D4, function(x){x^2}, cond = 2, useApply = FALSE) E(N, function(x){x^2}) E(as(N, "UnivariateDistribution"), function(x){x^2}, useApply = FALSE) # crude Monte-Carlo E(D4, function(x, cond){cond*x^2}, cond = 2, withCond = TRUE) E(D4, function(x, cond){cond*x^2}, cond = 2, withCond = TRUE, useApply = FALSE) E(N, function(x){2*x^2}) E(as(N, "UnivariateDistribution"), function(x){2*x^2}, useApply = FALSE) # crude Monte-Carlo Y <- 5 * Binom(4, .25) - 3 Y E(Y) @ % \paragraph{Controlling integration range:} From version 2.1 on, \code{E} gains arguments \code{low} and \code{upp} to restrict evaluation of the integrand to a given integration domain; these arguments can also be passed through by other functionals based on expectation, like \code{var}, \code{sd}, \code{skewness}, and \code{kurtosis}. <>= E(Cauchy(), low=3, upp=5) var(Cauchy(), low=3, upp=5) @ \paragraph{Controlling accuracy:} From version 2.1 on, our expectation methods gain explicit arguments to set accuracy locally; i.e.; the MC-methods have an argument \code{Nsim} defaulting to global option \code{MCIterations}, while the methods for class \code{AbscontDistribution} using numerical integration have an argument \code{rel.tol} defaulting to global option \code{ErelativeTolerance}. To obtain a sensible integration range automatically, these methods use both quantile and scale based methods; more precisely you may pass on arguments \code{lowerTruncQuantile} and \code{upperTruncQuantile}, defaulting to global options \code{ElowerTruncQuantile}, and \code{EupperTruncQuantile}, respectively, by means of these we determine lower and upper quantiles $l_0$, $u_0$. In addition, we determine scale based bounds as ${\rm median} \pm s_f \,{\rm IQR}$ where $s_f$ is a scaling factor to be passed on as argument \code{IQR.fac} which defaults to global option \code{IQR.fac}. % %% no longer needed in knitr: %% %%(ll)expectationPrep, eval = TRUE, echo = FALSE(ggeg) %%SweaveListingOptions("inSweave"=TRUE) %%(at) % <>= E(N, function(x)x^2) E(N, function(x)x^2, lowerTruncQuantile = 1e-5) var(Cauchy(), low =3, upperTruncQuantile = 1e-5, IQR.fac = 10) var(Cauchy(), low =3, upperTruncQuantile = 1e-10, IQR.fac = 20) @ % \subsubsection{Variance} The next-common functional is the variance. In order to keep a unified notation we will use the same name as for the empirical variance, i.e., \code{var}. \paragraph{masking \pkg{stats}-method \code{var}} To cope with the different argument structure of the empirical variance, i.e. \code{var(x, y = NULL, na.rm = FALSE, use)} and our functional variance, i.e., \code$var(x, fun = function(t) {t}, cond, withCond = FALSE, useApply = TRUE, ...)$ we have to mask the original \pkg{stats}-method: <>= var <- function(x , ...) {dots <- list(...) if(hasArg(y)) y <- dots$"y" na.rm <- ifelse(hasArg(na.rm), dots$"na.rm", FALSE) if(!hasArg(use)) use <- ifelse (na.rm, "complete.obs","all.obs") else use <- dots$"use" if(hasArg(y)) stats::var(x = x, y = y, na.rm = na.rm, use) else stats::var(x = x, y = NULL, na.rm = na.rm, use) } @ before registering \code{var} as generic function. Doing so, if the \code{x} (or the first) argument of \code{var} is not of class \code{UnivariateDistribution}, \code{var} behaves identically to the \pkg{stats} package \paragraph{default method} if \code{x} is of class \code{UnivariateDistribution}, \code{var} just returns the variance of distribution \code{X} --- or of \code{fun(X)} if a function is passed as argument fun, or, if a condition argument \code{cond} (for $Y=y$), ${\rm Var}\,[X|Y=y]$ respectively ${\rm Var}\,[f(X)|Y=y]$ --- just as for \code{E}. The same goes for corresponding arguments controlling the accuracy of \code{E} locally from version 2.1 on (see paragraph ``Controlling accuracy''): These may simply passed through in a call to \code{var}. For an object \code{Y} of a subclass of class union \code{AffLinDistribution}, we determine the variance as \code{Y@a\textasciicircum2 * var(Y@X0)} and hence use analytic terms for \code{X0} if available. \paragraph{exact evaluation} is provided for specific distributions if no function and no condition argument is given: this is available for \code{X} of class \code{Arcsine}, \code{Beta} (for noncentrality $0$), \code{Binom}, \code{Cauchy}, \code{Chisq}, \code{Dirac}, \code{Exp}, \code{Fd}, \code{Gammad}, \code{Geom}, %\code{Gumbel}, -> moved to RobExtremes \code{Hyper}, \code{Logis}, \code{Lnorm}, \code{Nbinom}, \code{Norm}, % \code{Pareto}, -> moved to RobExtremes \code{Pois}, \code{Unif}, \code{Td}. %, \code{Weibull}. -> moved to RobExtremes \subsubsection{Further functionals} By the same techniques we provide the following functionals for univariate distributions: \begin{itemize} \item standard deviation: \code{sd} \item skewness: \code{skewness} (code contributed by G. Jay Kerns, {\small \tt gkerns@ysu.edu}) \item kurtosis: \code{kurtosis} (code contributed by G. Jay Kerns, {\small \tt gkerns@ysu.edu}) \item median: \code{median} (not for function arguments) \item median of absolute deviations: \code{mad} (not for function/condition arguments) \item interquartile range: \code{IQR} (not for function arguments) \end{itemize} For details, see \code{?skewness}. % \subsection[Truncated moments (in package distrEx)]{Truncated moments (in package \pkg{distrEx})}\label{m1df} % For Robust Statistics, the first two truncated moments are very useful. These are realized as generic functions \code{m1df} and \code{m2df}: They use the expectation operator for general univariate distributions, but are overloaded for most specific distributions: \begin{itemize} \item \code{Binom} \item \code{Pois} \item \code{Norm} \item \code{Exp} \item \code{Chisq} \end{itemize} % \subsection[Distances (in package distrEx)]{Distances (in package % \pkg{distrEx})}\label{Distances} % For several purposes like Goodness-of-fit tests or minimum-distance estimators, distances between distributions are useful. This applies in particular to Robust Statistics. In package \pkg{distrEx}, we provide the follwoing distances: \begin{itemize} \item Kolmogoroff distance (\code{KolmogorovDist}) \item total variation distance (\code{TotalVarDist}) \item Hellinger distance (\code{HellingerDist}) \item Cram\'er von Mises distance (\code{CvMDist}) with an additional argument for the weighting measure $\mu$ (defaulting to second operand $Q$): $$ d_\mu(P,Q)^2=\int \Big(Q((-\infty;t])-P((-\infty;t])\Big)^2\,\mu(dt) $$ \item convex-contamination ``distance'' (asymmetric!) (\code{ContaminationSize}) defined as $$ d(Q,P):=\inf\{r>0\,|\, \exists \;\mbox{probability } H:\quad Q=(1-r)P+rH \} $$ \item from version 2.1 on: an asymmetric version of total variation distance (\code{AsymTotalVarDist}): to given ratio $\rho\ge 0$ of negative to positive part of the deviation we set $$ d_{v;\rho}(Q,P):=\int (dQ - c\,dP)_+ $$ where $c\in\R $ is such that $$\rho \int (dQ - c\,dP)_+=\int (dQ - c\,dP)_-$$ \item minimal total variation distance (\code{OAsymTotalVarDist}): $$ d_{v;{\rm opt}}(Q,P):= \min_c \int (dQ - c\,dP)_+ + (dQ - c\,dP)_- $$ \end{itemize} Methods using numerical integration use a similar technique as for the expectation mentioned in subsection~\ref{Expect}, combining scale and quantile based methods to obtain a sensible integration range automatically. \subsection[Functions for demos (in package distrEx)]{Functions for demos (in package \pkg{distrEx})}\label{Demos} % To illustrate the possibilities with packages \pkg{distr} and \pkg{distrEx} we include two major demos to \pkg{distrEx}, each with extra code to it --- one for the CLT and one for the LLN. From version 2.0 on, we have started a new package \pkg{distrTeach}, which is to use the capabilities of packages \pkg{distr} and \pkg{distrEx} for illustrating topics of Stochastics and Statistics as taught in secondary school. So far we have moved the illustrations for the CLT and the LLN just mentioned to it. \subsubsection{CLT for arbitrary summand distribution} By means of our convolution algortithm as well as with the operators \code{E} and \code{sd} an illustration for the CLT is readily written: function \code{illustrateCLT}, respectively demo \code{illustCLT}. For plotting, we have particular methods for discrete and absolute continuous distributions. The user may specify a given summand distribution, an upper limit for the consecutive sums to be considered and a pause between the corresponding plots in seconds. From version 1.9 on, we also include a \code{TclTk}-based version of this demo, where the user may enter the distribution argument (i.e.; the summands' distribution) into a text line and control the sample size by a slider in some widget: \code{illustCLT\_tcl} From version 2.0 on, this functionality has moved to package \pkg{distrTeach}. \subsubsection{LLN for arbitrary summand distribution} From version 1.9 on, similarly, we provide an illustration for the LLN: function \code{illustrateLLN}, respectively demo \code{illustLLN}. The user may specify a vector of sample sizes to be considered, the number of replicates to be drawn and a pause between the corresponding plots in seconds, also, optionally, the limiting expectation (in case of class \code{Cauchy}: the non-limiting median) is drawn as a line and Chebyshev/CLT-based (pointwise) confidence bands and their respective empirical coverages are displayed. From version 2.0 on, this functionality has moved to package \pkg{distrTeach}. \subsubsection{Deconvolution example} To illustrate conditional distributions and their implementation in \linebreak[4]\pkg{distrEx}, we consider the following situation: We consider a signal $X\sim P^X$ which is disturbed by noise $\varepsilon\sim P^\varepsilon$, independent from $X$; in fact we observe $Y=X+\varepsilon$ and want to reconstruct $X$ by means of $Y$. By means of the generating function \code{PrognCondDistribution} of package \pkg{distrEx}, for absolutely continuous $P^X, P^\varepsilon$, we may determine the factorized conditional distribution $P^{X|Y=y}$, and based on this either its (posterior) mode oder (posterior) expectation; also see \code{demo(Prognose, package="distrEx")}. % ----------------------------------------------------------------------------- \section[Package distrMod]{Package {\tt distrMod}}\label{distrMod} % ----------------------------------------------------------------------------- The package \pkg{distrMod} aims for an object orientated (S4-styple) implementation of probability models and introduces several new S4-classes for this purpose. Moreover, it includes functions to compute minimum criterion estimators -- in particular, minimum distance and maximum likelihood (i.e., minimum negative log-likelihood) estimators. % ----------------------------------------------------------------------------- \subsection{Symmetry Classes} % ----------------------------------------------------------------------------- As symmetry is a property which usually cannot be proven via numerical computations, we introduce the S4-class \code{Symmetry} and corresponding subclasses which may serve as slots which indicate that there exists a certain symmetry. So far, we have subclasses for the symmetry of distributions as well as for the symmetry of functions; confer Figure~\ref{figSym}. \ifpdf \begin{figure}[!ht] \vspace{2ex} \begin{center} \includegraphics[width=13cm]{Symmetry.pdf}% \caption{\label{figSym}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{Symmetry} where we do not repeat inherited slots }} \end{center} \vspace{-4ex} \end{figure} \else \begin{figure}[htb] \begin{center} \includegraphics[width=7.5cm]{Symmetry.eps}% \caption{\label{figSym}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{Symmetry} where we do not repeat inherited slots }} \end{center} \vspace{-1ex} \end{figure} \fi % ----------------------------------------------------------------------------- \newpage \subsection{Model Classes} % ----------------------------------------------------------------------------- Based on class \code{Distribution} and its subclasses we define classes for families of probability measures. So far, we specialised this to parametric families of probability measures; confer Figure~\ref{figPF}. But it would also be possible to derive subclasses for other (e.g., semiparametric) families of probability measures. In case of $L_2$-differentiable (i.e., smoothly parameterized) parametric families we introduce several additional slots, in particular the slot \code{L2deriv} which is of class \code{EuclRandVarList}. Hence, package \pkg{distrMod} depends on package \pkg{RandVar}~\cite{MK:05}. \ifpdf \begin{figure}[!ht] \vspace{2ex} \begin{center} \includegraphics[width=10cm]{ProbFamily.pdf}% \caption{\label{figPF}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{ProbFamily} where we do not repeat inherited slots }} \end{center} \vspace{-4ex} \end{figure} \else \begin{figure}[htb] \begin{center} \includegraphics[width=7.5cm]{ProbFamily.eps}% \caption{\label{figPF}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{ProbFamily} where we do not repeat inherited slots }} \end{center} \vspace{-1ex} \end{figure} \fi % Note that for general central distribution in the one-dimensional location and scale model, we need to determine a consistency factor for the {\tt MAD} (when used as scale estimator). From version 2.1 on this consistency factor is determined automatically. % ----------------------------------------------------------------------------- \subsection[Parameter in a parametric family]% {Parameter in a parametric family: class \code{ParamFamParameter}} % ----------------------------------------------------------------------------- In many applications, it is not the whole parameter of a parametric family which is of interest, but rather parts of it, while the rest of it either is known and fixed or has to be estimated as a nuisance parameter; in other situations, we are interested in a (smooth) transformation of the parameter. This all is realized in a class design for the parameter of a parametric family ---class \code{ParamFamParameter}, the formal class of a slot of class \code{ParamFamily}. \ifpdf \begin{figure}[!ht] \vspace{2ex} \begin{center} \includegraphics[width=4cm]{ParamFamParameter.pdf}% \caption{\label{figPFParam}{\footnotesize Inheritance relations and slots of \code{ParamFamParameter} where we do not repeat inherited slots }} \end{center} \vspace{-4ex} \end{figure} \else \begin{figure}[htb] \begin{center} \includegraphics[width=7.5cm]{ParamFamParameter.eps}% \caption{\label{figPFParam}{\footnotesize Inheritance relations and slots of \code{ParamFamParameter} where we do not repeat inherited slots }} \end{center} \vspace{-1ex} \end{figure} \fi It has slots \code{name} (the name of the parameter), \code{main} (the interesting aspect of the parameter), \code{nuisance} an unknown part of the parameter of secondary interest, but which has to be estimated, for instance for confidence intervals, and \code{fixed} a known and fixed part of the parameter. Besides these it also has a slot \code{trafo} which also sort of arises in class \code{Estimate}. \code{trafo} realizes partial influence curves; i.e.; we are only interested is some possibly lower dimensional smooth (not necessarily linear or even coordinate-wise) aspect/transformation $\tau$ of the parameter $\theta$. To be coherent with the corresponding {\em nuisance} implementation, we make the following convention: The full parameter $\theta$ is split up coordinate-wise in a main parameter $\theta'$ and a nuisance parameter $\theta''$ (which is unknown, too, hence has to be estimated, but only is of secondary interest) and a fixed, known part $\theta'''$. Without loss of generality, we restrict ourselves to the case that transformation $\tau$ only acts on the main parameter $\theta'$ --- if we want to transform the whole parameter, we only have to assume both nuisance parameter $\theta''$ and fixed known part of the parameter $\theta'''$ have length 0. \paragraph{To the implementation:} Slot \code{trafo} can either contain a (constant) matrix $D_\theta$ or a function $$\tau\colon \Theta' \to \tilde \Theta,\qquad \theta \mapsto \tau(\theta)$$% mapping main parameter $\theta'$ to some range $\tilde \Theta$. If {\em slot value} \code{trafo} is a function, besides $\tau(\theta)$, it will also return the corresponding derivative matrix $\frac{\partial}{\partial \theta}\tau(\theta)$. More specifically, the return value of this function \code{theta} is a list with entries \code{fval}, the function value $\tau(\theta)$, and \code{mat}, the derivative matrix. In case \code{trafo} is a matrix $D$, we interpret it as such a derivative matrix $\frac{\partial}{\partial \theta}\tau(\theta)$, and, correspondingly, $\tau(\theta)$ is the linear mapping $\tau(\theta)=D\,\theta$.\par According to the signature, {\em method} \code{trafo} will return different return value types. For signatures \code{Estimate,missing}, \code{Estimate,ParamFamParameter}, and \code{ParamFamily,ParamFamParameter}, it will return a list with entries \code{fct}, the function $\tau$, and \code{mat}, the matrix $\frac{\partial}{\partial \theta}\tau(\theta)$. function $\tau$ will then return the list \code{list(fval, mat)} mentioned above. For signatures \code{ParamFamily,missing} and \code{ParamFamParameter,missing}, it will just return the corresponding matrix. From version 2.1 on, there are helper functions \code{trafo.fct()} (see \code{?trafo.fct}) and \code{trafoEst}. While \code{trafo.fct()} allows to access ``function'' aspect of the transformation, returning the corresponding function, \code{trafoEst} transforms an existing estimator of class \code{estimate} consistently (i.e.; with corresponding \code{untransfromed.estimate} and \code{untransformed.asvar} information and transformed \code{asvar}) by a ``trafo'' function; see \code{?trafoEst}. % ----------------------------------------------------------------------------- %\newpage \subsection{Risk Classes} % ----------------------------------------------------------------------------- The risk classes are up to now (i.e, version 2.0) not used inside of the distr-family. They are however used in the RobASt-family~\cite{MK:05}. We distinguish between various finite-sample and asymptotic risks; confer Figure~\ref{figRT}. The bias and norm classes given in Figure~\ref{figBT} and Figure~\ref{figNT}, respectively, occur as slots of the risk classes. \ifpdf \begin{figure}[!ht] \vspace{2ex} \begin{center} \includegraphics[width=7cm]{BiasType.pdf}% \caption{\label{figBT}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{BiasType} where we do not repeat inherited slots }} \end{center} \vspace{-4ex} \end{figure} \else \begin{figure}[htb] \begin{center} \includegraphics[width=6cm]{BiasType.eps}% \caption{\label{figBT}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{BiasType} where we do not repeat inherited slots }} \end{center} \vspace{-1ex} \end{figure} \fi \ifpdf \begin{figure}[!ht] \vspace{2ex} \begin{center} \includegraphics[width=4cm]{NormType.pdf}% \caption{\label{figNT}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{NormType} where we do not repeat inherited slots }} \end{center} \vspace{-4ex} \end{figure} \else \begin{figure}[htb] \begin{center} \includegraphics[width=7.5cm]{NormType.eps}% \caption{\label{figNT}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{NormType} where we do not repeat inherited slots }} \end{center} \vspace{-1ex} \end{figure} \fi \ifpdf \begin{figure}[!ht] \vspace{2ex} \begin{center} \includegraphics[width=13cm]{RiskType.pdf}% \caption{\label{figRT}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{RiskType} where we do not repeat inherited slots }} \end{center} \vspace{-4ex} \end{figure} \else \begin{figure}[htb] \begin{center} \includegraphics[width=7.5cm]{RiskType.eps}% \caption{\label{figRT}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{RiskType} where we do not repeat inherited slots }} \end{center} \vspace{-1ex} \end{figure} \fi % ----------------------------------------------------------------------------- \clearpage \subsection{Minimum Criterion Estimation} % ----------------------------------------------------------------------------- The S4-classes and methods defined inside of our distr-family enable us to define general functions for the computation of minimum criterion estimators -- in particular, minimum distance and maximum likelihood (i.e., minimum negative log-likelihood) estimators. The main function for this purpose is \code{MCEstimator}. As an example we can use the negative log-likelihood as criterion; i.e., compute the maximum likelihood estimator. <>= library(distrMod) x <- rgamma(50, scale = 0.5, shape = 3) G <- GammaFamily(scale = 1, shape = 2) negLoglikelihood <- function(x, Distribution){ res <- -sum(log(Distribution@d(x))) names(res) <- "Negative Log-Likelihood" return(res) } MCEstimator(x = x, ParamFamily = G, criterion = negLoglikelihood) @ The user can specialize the behavior of \code{MCEstimator} on two layers: instance-individual or class-individual. Using the first layer, we may specify model-individual starting values / search intervals by slot \code{startPar} of class \code{ParamFamily}, pass on special control parameters to functions \code{optim} / \code{optimize} by a \code{...} argument in function \code{MCEstimator}, and we may enforce valid parameter values by specifying function slot \code{makeOKPar} of class \code{ParamFamily}; also one can specify a penalty value penalizing invalid parameter values. E.g.; in case of the censored Poisson distribution family in demo {\tt censoredPois} to this package these functions are defined as <>= ## search interval for reasonable parameters startPar <- function(x,...) c(.Machine$double.eps,max(x)) ## what to do in case of leaving the parameter domain makeOKPar <- function(param) {if(param<=0) return(.Machine$double.eps) return(param)} @ In some situations, one would rather like to define rules for groups of models or to be even more flexible; this can be achieved using the class-individual layer: We may use method dispatch to find the ``right'' function to determine the MC estimator; to this end subclasses to class \code{L2ParamFamily} have to be defined, which has alread been done, e.g. in case of class \code{PoisFamily}. In general these sub classes will not have any new slots. E.g.; the code to define class \code{PoisFamily} simply is <>= setClass("PoisFamily", contains = "L2ParamFamily") @ For group models, like the location scale model, there may be additional slots and intermediate classes. E.g., <>= setClass("NormLocationFamily", contains = "L2LocationFamily") @ Then, for these subclasses, particular methods may be defined; so far, in package \pkg{distrMod} we have particular \code{validParameter} methods for classes \code{ParamFamily}, \code{L2ScaleFamily}, \code{L2LocationFamily}, and \code{L2LocationScaleFamily}. E.g.; the code to signature \code{L2ScaleFamily} simply is <>= setMethod("validParameter", signature(object = "L2ScaleFamily"), function(object, param, tol=.Machine$double.eps){ if(is(param,"ParamFamParameter")) param <- main(param) if(!all(is.finite(param))) return(FALSE) if(length(param)!=1) return(FALSE) return(param > tol)}) @ To move the whole model from one parameter value to the other, so far we have \code{modifyModel} methods for classes \code{L2ParamFamily}, \code{L2LocationFamily}, \code{L2ScaleFamily}, \code{L2LocationScaleFamily}, \code{GammaFamily}, and \code{ExpScaleFamily}, where the second argument to dispatch on so for has to be of class \code{ParamFamParameter}. E.g.; the code to signature \code{model="GammaFamily"} is <>= setMethod("modifyModel", signature(model = "GammaFamily", param = "ParamFamParameter"), function(model, param, ...){ M <- modifyModel(as(model, "L2ParamFamily"), param = param, .withCall = FALSE) M@L2derivSymm <- FunSymmList(OddSymmetric(SymmCenter = prod(main(param))), NonSymmetric()) class(M) <- class(model) return(M) }) @ We also allow for particular methods within function \code{MCEstimator}, as therein we call method \code{mceCalc}; so far there only is a method for \code{signature(x="numeric", PFam="ParamFamily")} Similarly, and more important, the same technique is applied for the wrapper function \code{MLEstimator}. In case of the maximum likelihood estimator as well as in case of minimum distance (MD) estimation there are the function \code{MLEstimator} and \code{MDEstimator} which provide user-friendly interfaces to \code{MCEstimator}. Hence, the maximum likelihood estimator and for instance the Kolmogorov MD estimator can more easily be computed as follows. <>= MLEstimator(x = x, ParamFamily = G) MDEstimator(x = x, ParamFamily = G, distance = KolmogorovDist) @ Within \code{MLEstimator}, we call method \code{mleCalc}, which then dispatches according to its arguments \code{x} and \code{PFam} as in case of method \code{mceCalc}. So far \code{x} must inherit from class \code{numeric}, and there are particular methods for argument \code{PFam} of classes \code{ParamFamily}, \code{BinomFamily}, \code{PoisFamily}, \code{NormLocationFamily}, \code{NormScaleFamily}, and \code{NormLocationScaleFamily}. More specifically, \code{mleCalc} must have an extra \code{...} argument to cope with different callings from \code{MLEstimator}; additional arguments are possible of course. The return value must be a list with prescribed structure; to this end function \code{meRes()} is helpful which produces this structure. E.g. the \code{mleCalc}-method for \code{signature(x="numeric", PFam="NormScaleFamily")} is <>= setMethod("mleCalc", signature(x = "numeric", PFam = "NormScaleFamily"), function(x, PFam, ...){ n <- length(x) theta <- sqrt((n-1)/n)*sd(x); mn <- mean(distribution(PFam)) ll <- -sum(dnorm(x, mean=mn, sd = theta, log=TRUE)) names(ll) <- "neg.Loglikelihood" crit.fct <- function(sd) -sum(dnorm(x, mean=mn, sd = sd, log=TRUE)) param <- ParamFamParameter(name = "scale parameter", main = c("sd"=theta)) if(!hasArg(Infos)) Infos <- NULL return(meRes(x, theta, ll, param, crit.fct, Infos = Infos)) }) @ We also provide a coercion to class \code{mle} from package \pkg{stats4}, hence making profiling by the \code{profile}-method therein possible. In order to be able to do so, we need to fill a functional slot \code{criterion.fct} of class \code{MCEstimate}. In many examples this is straightforward, but in higher dimensions, helper function \code{get.criterion.fct} can be useful, e.g. it handles the general case for \code{signature(PFam="ParamFamily")}. The results of our computations in functions\code{MCEstimator}, \code{MDEstimator}, and \code{MLEstimator} are objects of S4-class \code{MCEstimate} which inherits from S4-class \code{Estimate}. The definitions are given in Figure~\ref{figEst}. \ifpdf \begin{figure}[!ht] \vspace{2ex} \begin{center} \includegraphics[width=4cm]{Estimate.pdf}% \caption{\label{figEst}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{Estimate} where we do not repeat inherited slots }} \end{center} \vspace{-4ex} \end{figure} \else \begin{figure}[htb] \begin{center} \includegraphics[width=4cm]{Estimate.eps}% \caption{\label{figEst}{\footnotesize Inheritance relations and slots of the corresponding \mbox{(sub-)}classes for \code{Estimate} where we do not repeat inherited slots }} \end{center} \vspace{-1ex} \end{figure} \fi For class \code{MCEstimate}, we have a method \code{confint}, which produces confidence intervals (of class \code{Confint}). For class \code{Confint} as well as for class \code{Estimate} we have particular \code{show} and \code{print} methods where you may scale the output by setting global options with \code{distrModOptions}, see also subsection~\ref{distrModO}. As example consider the following: <>= require(distrMod) ## some transformation mtrafo <- function(x){ nms0 <- c("scale","shape") nms <- c("shape","rate") fval0 <- c(x[2], 1/x[1]) names(fval0) <- nms mat0 <- matrix( c(0, -1/x[1]^2, 1, 0), nrow = 2, ncol = 2, dimnames = list(nms,nms0)) list(fval = fval0, mat = mat0)} set.seed(124) x <- rgamma(50, scale = 0.5, shape = 3) ## parametric family of probability measures G <- GammaFamily(scale = 1, shape = 2, trafo = mtrafo) ## MLE res <- MLEstimator(x = x, ParamFamily = G) print(res, digits = 4, show.details="maximal") print(res, digits = 4, show.details="medium") print(res, digits = 4, show.details="minimal") ci <- confint(res) print(ci, digits = 4, show.details="maximal") print(ci, digits = 4, show.details="medium") print(ci, digits = 4, show.details="minimal") ## some profiling par(mfrow=c(2,1)) plot(profile(res)) @ % ----------------------------------------------------------------------------- \section{Options}\label{options} % ----------------------------------------------------------------------------- \subsection[Options for distr]{Options for \pkg{distr}} % ----------------------------------------------------------------------------- Analogously to the \code{options} command in {\sf R} you may specify a number of global ``constants'' to be used within the package. These include \begin{itemize} \item \code{DefaultNrFFTGridPointsExponent}: the binary logarithm of the number of grid-points used in FFT ---default $12$ \item \code{DefaultNrGridPoints}: number of grid-points used for a continuous variable ---default $4096$ \item \code{DistrResolution}: the finest step length that is permitted for a grid for a discrete variable ---default $1{\rm e-}\!06$ \item \code{RtoDPQ.e}: For simulational determination of \code{d}, \code{p} and \code{q}, $10^{\rm RtoDPQ.e}$ random variables are simulated ---default $5$ \item \code{TruncQuantile}: to work with compact support, random variables are truncated to their lower/upper \code{TruncQuantile}-quantile ---default $1{\rm e-}\!05$.\\ From version 1.9 on, for $\varepsilon={\tt \code{TruncQuantile}}$, we use calls of form \code{q(X)(eps, lower.tail = FALSE)} instead of \code{q(X)(1-eps)} to gain higher precision. \item \code{warningSim}: controls whether a warning issued at printing/showing a \code{Distribution} object the slots of which have been filled starting with simulations ---default \code{TRUE} \item \code{warningArith}: controls whether a warning issued at printing/showing a \code{Distribution} object produced by arithmetics operating on distributions ---default \code{TRUE} \item \code{withgaps}: controls whether in the return value of arithmetic operations the slot \code{gaps} of an the \code{AbscontDistribution} part is filled automatically based on empirical evaluations via \code{setgaps} ---default \code{TRUE} \item \code{simplifyD}: controls whether in the return value of arithmetic operations there is a call to \code{simplifyD} or not ---default \code{TRUE} \item \code{DistrCollapse}: logical; shall support points with distance smaller than \code{DistrResolution} be collapsed; default value: \code{TRUE} \item \code{withSweave}: logical; is code run in Sweave (then no new graphic devices are opened); default value: \code{FALSE} \item \code{DistrCollapse.Unique.Warn}: logical; shall a warning be issued upon collapsing? default value: \code{TRUE} \item \code{use.generalized.inverse.by.default} % which is a logical variable giving the default value for argument \code{generalized} of our method \code{solve} in package \pkg{distrMod}. This argument decides whether our method \code{solve} is to use generalized inverses if the original \code{solve}-method from package \pkg{base} fails; if the option is set to \code{FALSE}, in case of failure, and unless argument \code{generalized} is not explicitely set to \code{TRUE}, \code{solve} will throw an error as is the \pkg{base}-method behavior. The default value of thie option is \code{TRUE}. % \end{itemize} All current options may be inspected by \code{distroptions()} and modified by \linebreak[4] \code{distroptions(""=)}.\\ As \code{options}, \code{distroptions("")} returns a list of length $1$ with the value of the corresponding option, so here, just as \code{getOption}, \code{getdistrOption("")} will be preferable, which only returns the value. %\newline0?????? \subsection[Options for distrEx]{Options for \pkg{distrEx}}\label{distrExoptions} Up to version {\tt 0.4-4} we used the function \code{distrExOptions(arg = "missing", value = -1)} to manage some global options for \pkg{distrEx}, i.e.:\\ \code{distrExOptions()} returns a list of these options, \code{distrExOptions(arg=x)} returns option \code{x}, and \code{distrExOptions(arg=x,value=y)} sets the value of option \code{x} to {\tt y}.\\ From version {\tt 1.9} on, we use a mechanism analogue to the \code{distroptions}/\code{getdistrOption} commands: You may specify certain global output options to be used within the package with \code{distrExoptions}/\code{getdistrExOption}. These include \begin{itemize} \item \code{MCIterations}: number of Monte-Carlo iterations used for crude Monte-Carlo integration; defaults to \code{1e5}. \item \code{GLIntegrateTruncQuantile}: If \code{integrate} fails and there are infinite integration limits, the function \code{GLIntegrate} is called inside of \code{distrExIntegrate} with the corresponding quantiles \code{GLIntegrateTruncQuantile} resp.\ \code{1-GLIntegrateTruncQuantile} as finite integration limits; defaults to \code{10*.Machine$double.eps}. \item \code{GLIntegrateOrder}: The order used for the Gau{\ss{}}-Legendre integration inside of \code{distrExIntegrate}; defaults to 500. \item \code{ElowerTruncQuantile}: The lower limit of integration used inside of \code{E} which corresponds to the \code{ElowerTruncQuantile}-quantile; defaults to \code{1e-7}. \item \code{EupperTruncQuantile}: The upper limit of integration used inside of \code{E} which corresponds to the \code{(1-ElowerTruncQuantile)}-quantile; defaults to \code{1e-7}. \item \code{ErelativeTolerance}: The relative tolerance used inside of \code{E} when calling\linebreak[4] \code{distrExIntegrate}; defaults to \code{.Machine$double.eps^0.25}. \item \code{m1dfLowerTruncQuantile}: The lower limit of integration used inside of \code{m1df} which corresponds to the \code{m1dfLowerTruncQuantile}-quantile; defaults to \code{0}. \item \code{m1dfRelativeTolerance}: The relative tolerance used inside of \code{m1df} when calling \code{distrExIntegrate}; defaults to \code{.Machine$double.eps^0.25}. \item \code{m2dfLowerTruncQuantile}: The lower limit of integration used inside of \code{m2df} which corresponds to the \code{m2dfLowerTruncQuantile}-quantile; defaults to \code{0}. \item \code{m2dfRelativeTolerance}: The relative tolerance used inside of \code{m2df} when calling \code{distrExIntegrate}; defaults to \code{.Machine$double.eps^0.25}. \item \code{nDiscretize}: number of support values used for the discretization of objects of class \code{"AbscontDistribution"}; defaults to 100. \item \code{hSmooth}: smoothing parameter to smooth objects of class \code{"DiscreteDistribution"}. This is done via convolution with the normal distribution \code{Norm(mean = 0, sd = hSmooth)}; defaults to \code{0.05}. \item \code{IQR.fac}: for determining sensible integration ranges, we use both quantile and scale based methods; for the scale based method we use the median of the distribution $\pm$ \code{IQR.fac} $\times$ the IQR; defaults to 15. \end{itemize} %We are planning to switch to \code{distroptions}/\code{getdistrOption}-like % commands in the next release of this package. % \subsection[Options for distrMod]{Options for \pkg{distrMod}} \label{distrModO} Just as with to the \code{distroptions}/\code{getdistrOption} commands you may specify certain global output options to be used within the package with \code{distrModoptions}/\linebreak[4] \code{getdistrModOption}. These include \begin{itemize} % \item \code{show.details} which controls the detailedness for method \code{show} for objects of classes of the \pkg{distrXXX} family of packages. Possible values are \begin{itemize} \item \code{"maximal"}: {all information is shown} \item \code{"minimal"}: {only the most important information is shown} \item \code{"medium"}: {somewhere in the middle; see actual \code{show}-methods for details.} \end{itemize} The default value is \code{"maximal"}. \end{itemize} % \subsection[Options for distrSim]{Options for \pkg{distrSim}} Just as with to the \code{distroptions}/\code{getdistrOption} commands you may specify certain global output options to be used within the package with \code{distrSimoptions}/\linebreak[4] \code{getdistrSimOption}. These include \begin{itemize} \item \code{MaxNumberofPlottedObs} the maximal number of observation plotted in a plot of an object of class \code{Dataclass}; defaults to 4000 \item \code{MaxNumberofPlottedObsDims}: the maximum number of observations to be plotted in a plot of an object of class \code{Dataclass} and descendants; defaults to 6. \item \code{MaxNumberofPlottedRuns}: the maximum number of runs to be plotted in a plot of an object of class \code{Dataclass} and descendants (one run/panel); defaults to 6. \item \code{MaxNumberofSummarizedObsDims}: the maximum number of observations to be summarized of an object of class \code{Dataclass} and descendants; defaults to 6. \item \code{MaxNumberofSummarizedRuns}: the maximum number of runs to be summarized of an object of class \code{Dataclass} and descendants; defaults to 6. \end{itemize} % \subsection[Options for distrTEst]{Options for \pkg{distrTEst}} Just as with to the \code{distroptions}/\code{getdistrOption} commands you may specify certain global output options to be used within the package with \code{distrTEstoptions}/ \linebreak[4]\code{getdistrTEstOption}. These include \begin{itemize} \item \code{MaxNumberofPlottedEvaluations}: the maximal number of evaluations to be plotted in a plot of an object of class \code{EvaluationList}; defaults to 6 \item \code{MaxNumberofPlottedEvaluationDims}: the maximal number of evaluation dimensions to be plotted in a plot of an object of class \code{Evaluation}; defaults to 6 \item \code{MaxNumberofSummarizedEvaluations}: the maximal number of evaluations to be summarized of an object of class \code{EvaluationList}; defaults to 15 \item \code{MaxNumberofPrintedEvaluations}: the maximal number of evaluations printed of an object of class \code{EvaluationList}; defaults to 15 \end{itemize} %??????1 \section{Further Documentation} \subsection{Help pages} Additional information can be obtained during an {\sf R} session, using help files and startup messages. \paragraph{Startup messages} Upon loading packages from the {\tt distrXXX} family of packages with \code{require} or \code{library}, by default startup messages pop up giving some starting points where to look for further information; howto scale/switch off these startup messages will be discussed in the next section. \paragraph{Package help file} A starting point is doubtless the package help files, called e.g.\ by \code{?distr}. We give an {\tt ASCII} form of a class graph with all the {\tt S4} classes in the corresponding package, and indicate the implemented methods and functions. From version 2.1 this help file for \pkg{distr} has a new section "Extension packages" pointing to the various extensions of this package. \paragraph{Individual help files} Of course, for looking up the syntax of a function the help pages available with \code{?} / \code{help} are extremely useful. Note that for our packages, help pages are also available for internal (i.e.; non exported) functions. \subsection[NEWS file]{{\tt NEWS} file} To get more details on the implementation process you may also consult a corresponding {\tt NEWS} file where all relevant changes are listed at the package version where they have been implemented. You may inspect this {\tt NEWS} file with \code{NEWS()}. \subsection{Vignettes} Besides this large (or huge) vignette, there are also some minor vignettes available, assembling documentation somewhat more coherent than in the help pages. From version 2.1 on, e.g., there is a new vignette ``How to generate new distributions in packages distr, distrEx'' in package \pkg{distr} which is to encourage the implementation of new distributions and distribution classes by third parties. \subsection{Articles} Of course we plan to publish some of our findings in peer-reviewed journals; so the part of this vignette dealing with package \pkg{distrMod} will form the basis for an article on this package by the present authors. \section{Startup Messages} % For the management of startup messages, from version~1.7, we use package \pkg{startupmsg}: When loading/attaching packages \pkg{distr}, \pkg{distrEx}, \pkg{distrSim}, or \pkg{distrTEst} for each package a disclaimer is displayed. You may suppress these start-up banners/messages completely by setting \linebreak[4]\code{options("StartupBanner"="off")} somewhere before loading this package by \code{library} or \code{require} in your R-code / R-session. If option \code{"StartupBanner"} is not defined (default) or setting\linebreak[4] \code{options("StartupBanner" = NULL)} or %\linebreak[4] \code{options("StartupBanner" = "complete")} the complete start-up banner is displayed. For any other value of option \code{"StartupBanner"} (i.e., not in \linebreak[3]\code{c(NULL, "off", "complete")}) only the version information is displayed. The same can be achieved by wrapping the \code{library} or \code{require} call into either \linebreak[4]\code{onlytypeStartupMessages(, atypes="version")} or\linebreak[4] \code{suppressStartupMessages()}. % \section[System/version requirements]{System/version requirements, license, etc.\ } % \subsection{System requirements} % As our package is completely written in {\sf R}, there are no dependencies on the underlying OS; of course, there is the usual speed gain possible on recent machines. We have tested our package on a Pentium~II with 233 MHz, on Pentium~III's with 0.8--2.1 GHz, and on an Athlon with 2.5 GHz giving a reasonable performance. \subsection{Required version of {\sf R}} Contrary to the hardware required, if you want to use \code{library} or \code{require} to use \pkg{distr} within {\sf R} code, you need at least {\sf R} Version {\tt 1.8.1}, as we make use of name space operations only available from that version on; also, the command \code{setClassUnion}, which is used in some sources, is only available from that version on.\\ % On the other hand, if the package may be pasted in by \code{source}, the code works with {\sf R} from version {\tt 1.7.0} on ---but without using name-spaces, which is only available from {\tt 1.8.0} on. Due to some changes in {\sf R} from version {\tt 1.8.1} to {\tt 1.9.0} and from {\tt 1.9.1} to {\tt 2.0.0}, we have to provide different {\tt zip}/{\tt tar.gz}-Files for these versions.\\ Versions of \pkg{distr} from version number {\tt 1.5} onwards are only supplied for {\sf R} Version {\tt 2.0.1 patched} and later. After a reorganization, versions of \pkg{distr} from version number {\tt 1.6} onwards are only supplied for {\sf R} Version {\tt 2.2.0 patched} and later. \subsection{Dependencies} In package \pkg{distr}, from version 2.0, we make use of \code{D1ss} from \href{mailto:maechler@stat.math.ethz.ch}{Martin M\"achler's} package \pkg{sfsmisc}. In package \pkg{distrEx}, up to version 2.4, we needed \href{mailto:alec_stephenson@hotmail.com}{Alec Stephenson's} package \pkg{evd} for the extreme value distributions implemented therein, as well as \href{mailto:Vincent.Goulet@act.ulaval.ca}{Vincent Goulet}'s and Mathieu Pigeon's package \pkg{actuar} for the (single parameter) Pareto distribution (from \pkg{distrEx} version 2.1 on) which has been ported to our framework by \href{mailto:nhorbenko@kpmg.de}{Nataliya Horbenko}. From version 2.4 on, this infrastructure has moved to package \pkg{RobExtremes}. In package \pkg{distrSim}, and conseqently also in package \pkg{distrTEst} we use \href{mailto:pgilbert@bank-banque-canada.ca}{Paul Gilbert's} package \pkg{setRNG} to be installed from \href{https://cran.r-project.org/mirrors.html}{\tt CRAN} for the control of the seed of the random number generator in our simulation classes. More precisely, for our version $\le$ {\tt 1.6} we need his version $<$ {\tt 2006.2-1}, and for our version $\ge$ {\tt 1.7} we need his version $\ge$ {\tt 2006.2-1}. From package version {\tt 1.7}/{\tt 0.4-3} on, we also need package \pkg{startupmsg} by the first of the present authors, which also is available on \href{https://cran.r-project.org/mirrors.html}{\tt CRAN}. \subsection{License} This software is distributed under the terms of the GNU GENERAL PUBLIC LICENSE Version 2, June 1991, more specifically under {\tt LGPL-3} confer\newline %\href{http://www.gnu.org/copyleft/gpl.html}% %{\tt http://www.gnu.org/copyleft/gpl.html} \href{https://www.gnu.org/copyleft/gpl.html}% {\footnotesize \tt https://www.gnu.org/copyleft/gpl.html}, \href{https://en.wikipedia.org/wiki/GNU_Lesser_General_Public_License}% {\footnotesize\tt \url{https://en.wikipedia.org/wiki/GNU_Lesser_General_Public_License}} \section{Details to the implementation} \begin{itemize} \item As the normal distribution is closed under affine transformations, we have overloaded the corresponding methods. %The same goes for the exponential distribution under scale transformations. \item For the general convolution algorithm for univariate probability distribution functions/densities by means of FFT, which we use in the overloaded {\tt "+"}-operator, confer~\cite{K:R:S:04}. \item Exact convolution methods are implemented for the normal, the Poisson, the binomial, the negative binomial, the Gamma (and the \code{Exp}), and the {$\chi^2$} distribution \item Exact formulae for scale transformations are implemented for the Exp-/Gamma-distribution, the Weibull and the log-normal distribution (the latter two from version 1.9 on). \item Exact formulae for affine linear transformations are available for the normal, the logistic and the Cauchy distribution (the latter two from version 1.9 on). \item Instances of any class transparent to the user are initialized by\linebreak[4] {\tt([=,...])} where except for class \code{DataClass} in package \pkg{distrSim} all classes have default values for all their slots; in \code{DataClass}, the slot \code{Data} has to be specified. \item Multiplication (and Division) is implemented as corresponding exponentials of the convolution of the logarithms (evaluated separately for positive and negative parts). \item Exponentiation also uses the exp-log trick. \item Multiplication, Exponentation, and Min/Maximum of an \code{AbscontDistribution} and a \code{DiscreteDistribution} as an intermediate step produce a \code{UnivarMixingDistribution}, with one mixing component for each element of the support of the \code{DiscreteDistribution}. As a last step, this \code{UnivarMixingDistribution} is then ``flattened''. \item As suggested in \cite{OOPGent} all slots are accessed and modified by corresponding accessor- and replacement functions ---templates for which were produced by \code{standardMethods}. {\bf We strongly discourage the use of the \code{@}-operator to modify or even access slots \code{r}, \code{d}, \code{p}, and \code{q}, confer Example~{\ref{destrex}}.} \end{itemize} % \section{A general utility} % Following \cite{OOPGent}, the programmer of {\tt S4}-classes should provide accessor and replacement functions for the inspection/modification of any newly introduced slot. This can be quite a task when you have a lot of classes/slots. As these functions all have the same structure, it would be nice to automatically generate templates for them. Faced with this problem in developing this package, Thomas Stabla has written such a utility, \code{standardMethods} ---which the authors of this package recommend for any developer of {\tt S4}-classes. For more details, see \code{?standardMethods}. \section{Odds and Ends} \subsection[What should be done and what we could do]% {What should be done and what we could do ---for version $>$\pkgversion} \begin{itemize} \item application of analytic FourierTransforms instead of FFT where appropriate ---perhaps also to be controlled by a parameter/option \item use the \code{q}-slot applied to \code{runif} in \code{simplifyr} for continuous distributions \item further exact formulae for binary arithmetic operations like \code{"*"} %\item derivation of a class \code{LatticeDistribution} from %\code{DiscreteDistribution} to be able to easily apply FFT %\item redo the math-method for discrete distributions when only slot \code{r} % %is given %\item better documentation for method \code{evaluate} / class \code{Evaluation} %\item adaptation of class \code{Evaluation} / method \code{evaluate} to be more flexible %\item use of \code{initialize} in generating functions %\item generating function for new distribution classes to ease inclusion of %new distributions \item goodness of fit tests for distribution-objects %\item use of $\tt \backslash$\code{S4method} in documentation %\item overloading binary operators of group \code{Math2} for independent %distributions \item defining a subgroup of \code{Math2} of invertible binary operators %%\item better use of \code{concept} in rd-files %\item suggestion by \href{mailto:kouros.owzar@duke.edu}{Kouros Owzar}: %in case of parameters of dimension $p>1$ %---as in case of ${\cal N}(\mu,\sigma^2)$--- possibility to % %access/replace that parameter as a vector \end{itemize} \subsection{What should be done but for which we lack the know-how} \begin{itemize} \item multivariate distributions \item conditional distributions \item copula \end{itemize} % \section{Acknowledgement} In order to give our acknowledgements their due place in the manual, we insert them before some rather extensive presentation of examples, because otherwise they would probably get lost or overseen by most of the readers. We thank Martin M\"achler and Josef Leydold for their helpful suggestions in conceiving the package. John Chambers also gave several helpful hints and insights when responding to our requests concerning the {\tt S4}-class concept in {\tt r-devel}/ {\tt r-help}. We got stimulating replies to an RFC on {\tt r-devel} by Duncan Murdoch and Gregory Warnes. We also thank Paul Gilbert for drawing our attention to his package {\tt setRNG} and making it available as stand-alone version. In the last few days before the release on {\tt CRAN}, Kurt Hornik and Uwe Ligges were very kind, helping us to find the clue how to pass all necessary checks by {\tt R CMD check}. We also thank G. Jay Kerns for contributing code for the skewness and kurtosis functionals. Last not least a big "thank you" to Torsten Hothorn as editor of {\tt R-News}, for his patience with our endless versions until we finally came to a publishable version. %\input{distrack} % \section{Examples} \subsection{$12$-fold convolution of uniform $(0,1)$ variables} %\begin{footnotesize} % Code also available under\newline % % {\href{http://www.uni-bayreuth.de/departments/math/org/mathe7/DISTR/NormApprox.R}% % {\parbox[t]{12cm}{$\mbox{\hspace{2cm}}$% % {\tt http://www.uni-bayreuth.de/departments/math/org/}\\% % {$\mbox{\hspace{2cm}}$\hphantom{\tt http:/}% % {\tt /mathe7/DISTR/NormApprox.R}}}}}\\[2ex] %\end{footnotesize} \begin{small} This example shows how easily we may get the distribution of the sum of $12$ ${\rm i.i.d.}$ ${\rm ufo}(0,1)$--variables minus $6$--- which was used as a fast generator of ${\cal N}(0,1)$--variables in times when evaluations of $\exp$, $\log$, $\sin$ and $\tan$ were expensive, confer \cite{Ric:88}, example~C, p.~163. The user should not be confused by expressions like {\tt U+U}: this {\em does not\/} mean {\tt 2U} but rather convolution of two independent identically distributed random variables. \end{small} <>= require(distr) N <- Norm(0,1) U <- Unif(0,1) U2 <- U + U U4 <- U2 + U2 U8 <- U4 + U4 U12 <- U4 + U8 NormApprox <- U12 - 6 x <- seq(-4,4,0.001) opar <- par(no.readonly = TRUE) par(mfrow = c(2,1)) plot(x, d(NormApprox)(x), type = "l", xlab = "", ylab = "Density", main = "Exact and approximated density") lines(x, d(N)(x), col = "red") legend("topleft", legend = c("NormApprox", "Norm(0,1)"), fill = c("black", "red")) plot(x, d(NormApprox)(x) - d(N)(x), type = "l", xlab = "", ylab = "\"black\" - \"red\"", col = "darkgreen", main = "Error") lines(c(-4,4), c(0,0)) par(opar) @ \subsection{Comparison of exact convolution to FFT for normal distributions}\label{Convex} %\begin{footnotesize} % Code also available under\newline \href{http://www.uni-bayreuth.de/departments/math/org/mathe7/DISTR/ConvolutionNormalDistr.R}% % {\parbox[t]{12cm}{$\mbox{\hspace{2cm}}${\tt http://www.uni-bayreuth.de/departments/math/org/}\\% % {$\mbox{\hspace{2cm}}$\hphantom{\tt http:/}{\tt /mathe7/DISTR/ConvolutionNormalDistr.R}}}}\\[2ex] %\end{footnotesize} \begin{small} This example illustrates the exactness of the numerical algorithm used to compute the convolution: We know that ${\cal L}({\tt A+B})={\cal N}(5,13)$ --- if the second argument of ${\cal N}$ is the variance \end{small} <>= require(distr) ## initialize two normal distributions A <- Norm(mean=1, sd=2) B <- Norm(mean=4, sd=3) ## convolution via addition of moments AB <- A+B ## casting of A,B as absolutely continuous distributions ## that is, ``forget'' that A,B are normal distributions A1 <- as(A, "AbscontDistribution") B1 <- as(B, "AbscontDistribution") ## for higher precision we change the global variable ## "TruncQuantile" from 1e-5 to 1e-8 oldeps <- getdistrOption("TruncQuantile") eps <- 1e-8 distroptions("TruncQuantile" = eps) ## support of A1+B1 for FFT convolution is ## [q(A1)(TruncQuantile), ## q(B1)(TruncQuantile, lower.tail = FALSE)] ## convolution via FFT AB1 <- A1+B1 ############################# ## plots of the results ############################# par(mfrow=c(1,3)) low <- q(AB)(1e-15) upp <- q(AB)(1e-15, lower.tail = FALSE) x <- seq(from = low, to = upp, length = 10000) ## densities plot(x, d(AB)(x), type = "l", lwd = 5) lines(x , d(AB1)(x), col = "orange", lwd = 1) title("Densities") legend("topleft", legend=c("exact", "FFT"), fill=c("black", "orange")) ## cdfs plot(x, p(AB)(x), type = "l", lwd = 5) lines(x , p(AB1)(x), col = "orange", lwd = 1) title("CDFs") legend("topleft", legend=c("exact", "FFT"), fill=c("black", "orange")) ## quantile functions x <- seq(from = eps, to = 1-eps, length = 1000) plot(x, q(AB)(x), type = "l", lwd = 5) lines(x , q(AB1)(x), col = "orange", lwd = 1) title("Quantile functions") legend("topleft", legend=c("exact", "FFT"), fill=c("black", "orange")) ## Since the plots of the results show no ## recognizable differencies, we also compute ## the total variation distance of the densities ## and the Kolmogorov distance of the cdfs ## total variation distance of densities total.var <- function(z, N1, N2){ 0.5*abs(d(N1)(z) - d(N2)(z)) } dv <- integrate(total.var, lower=-Inf, upper=Inf, rel.tol=1e-8, N1=AB, N2=AB1) cat("Total variation distance of densities:\t") print(dv) # 4.25e-07 ### meanwhile realized in package "distrEx" ### as TotalVarDist(N1,N2) ## Kolmogorov distance of cdfs ## the distance is evaluated on a random grid z <- r(Unif(Min=low, Max=upp))(1e5) dk <- max(abs(p(AB)(z)-p(AB1)(z))) cat("Kolmogorov distance of cdfs:\t", dk, "\n") # 2.03e-07 ### meanwhile realized in package "distrEx" ### as KolmogorovDist(N1,N2) ## old distroptions distroptions("TruncQuantile" = oldeps) @ \subsection{Comparison of FFT to {\tt RtoDPQ}}\label{compex} %\begin{footnotesize} % Code also available under\newline \href{http://www.uni-bayreuth.de/departments/math/org/mathe7/DISTR/ComparisonFFTandRtoDPQ.R}% % {\parbox[t]{12cm}{$\mbox{\hspace{2cm}}${\tt http://www.uni-bayreuth.de/departments/math/org/}\\% % {$\mbox{\hspace{2cm}}$\hphantom{\tt http:/}{\tt /mathe7/DISTR/ComparisonFFTandRtoDPQ.R}}}}\\[2ex] %\end{footnotesize} \begin{small} This example illustrates the exactness (or rather not--so--exactness) of the simulational default algorithm used to compute the distribution of transformations of group {\tt math}. \end{small} <>= require(distr) ################################ ## Comparison 1 - FFT and RtoDPQ ################################ N1 <- Norm(0,3) N2 <- Norm(0,4) rnew1 <- function(n) r(N1)(n) + r(N2)(n) X <- N1 + N2 # exact formula -> N(0,5) Y <- N1 + as(N2, "AbscontDistribution") # appoximated with FFT Z <- new("AbscontDistribution", r = rnew1) # appoximated with RtoDPQ # density-plot x <- seq(-15,15,0.01) plot(x, d(X)(x), type = "l", lwd = 3, xlab = "", ylab = "density", main = "Comparison 1", col = "black") lines(x, d(Y)(x), col = "yellow") lines(x, d(Z)(x), col = "red") legend("topleft", legend = c("Exact", "FFT-Approximation", "RtoDPQ-Approximation"), fill = c("black", "yellow", "red")) ############################################ ## Comparison 2 - "Exact" Formula and RtoDPQ ############################################ B <- Binom(size = 6, prob = 0.5) * 10 N <- Norm() rnew2 <- function(n) r(B)(n) + r(N)(n) Y <- B + N # "exact" formula Z <- new("AbscontDistribution", r = rnew2) # appoximated with RtoDPQ # density-plot x <- seq(-5,65,0.01) plot(x, d(Y)(x), type = "l", xlab = "", ylab = "density", main = "Comparison 2", col = "black") lines(x, d(Z)(x), col = "red") legend("topleft", legend = c("Exact", "RtoDQP-Approximation"), fill = c("black", "red")) @ \subsection{Comparison of exact and approximate stationary regressor distribution}\label{statex} %\begin{footnotesize} % Code also available under\newline \href{http://www.uni-bayreuth.de/departments/math/org/mathe7/DISTR/StationaryRegressorDistr.R}% % {\parbox[t]{12cm}{$\mbox{\hspace{2cm}}${\tt http://www.uni-bayreuth.de/departments/math/org/}\\% % {$\mbox{\hspace{2cm}}$\hphantom{\tt http:/}{\tt /mathe7/DISTR/StationaryRegressorDistr.R}}}}\\[2ex] %\end{footnotesize} \begin{small} Another illustration for the use of package {\tt "distr"}. In case of a stationary AR(1)--model, for non--normal innovation distribution, the stationary distribution of the observations must be approximated by finite convolutions. That these approximations give fairly good results for approximations down to small orders is exemplified by the Gaussian case where we may compare the approximation to the exact stationary distribution. \end{small} <>= require(distr) ## Approximation of the stationary regressor ## distribution of an AR(1) process ## X_t = phi X_{t-1} + V_t ## where V_t i.i.d N(0,1) and phi\in(0,1) ## We obtain ## X_t = \sum_{j=1}^\infty phi^j V_{t-j} ## i.e., X_t \sim N(0,1/(1-phi^2)) phi <- 0.5 ## casting of V as absolutely continuous distributions ## that is, ``forget'' that V is a normal distribution V <- as(Norm(), "AbscontDistribution") ## for higher precision we change the global variable ## "TruncQuantile" from 1e-5 to 1e-8 oldeps <- getdistrOption("TruncQuantile") eps <- 1e-8 distroptions("TruncQuantile" = eps) ## Computation of the approximation ## H=\sum_{j=1}^n phi^j V_{t-j} ## of the stationary regressor distribution ## (via convolution using FFT) H <- V n <- 15 ## may take some time ### switch off warnings [would be issued due to ### very unequal variances...] old.warn <- getOption("warn") options("warn" = -1) for(i in 1:n){Vi <- phi^i*V; H <- H + Vi } options("warn" = old.warn) ## the stationary regressor distribution (exact) X <- Norm(sd=sqrt(1/(1-phi^2))) ############################# ## plots of the results ############################# par(mfrow=c(1,3)) low <- q(X)(1e-15) upp <- q(X)(1e-15, lower.tail = FALSE) x <- seq(from = low, to = upp, length = 10000) ## densities plot(x, d(X)(x),type = "l", lwd = 5) lines(x , d(H)(x), col = "orange", lwd = 1) title("Densities") legend("topleft", legend=c("exact", "FFT"), fill=c("black", "orange")) ## cdfs plot(x, p(X)(x),type = "l", lwd = 5) lines(x , p(H)(x), col = "orange", lwd = 1) title("CDFs") legend("topleft", legend=c("exact", "FFT"), fill=c("black", "orange")) ## quantile functions x <- seq(from = eps, to = 1-eps, length = 1000) plot(x, q(X)(x),type = "l", lwd = 5) lines(x , q(H)(x), col = "orange", lwd = 1) title("Quantile functions") legend( "topleft", legend=c("exact", "FFT"), fill=c("black", "orange")) ## Since the plots of the results show no ## recognizable differencies, we also compute ## the total variation distance of the densities ## and the Kolmogorov distance of the cdfs ## total variation distance of densities total.var <- function(z, N1, N2){ 0.5*abs(d(N1)(z) - d(N2)(z)) } dv <- integrate(f = total.var, lower = -Inf, upper = Inf, rel.tol = 1e-7, N1=X, N2=H) cat("Total variation distance of densities:\t") print(dv) # ~ 5.0e-06 ### meanwhile realized in package "distrEx" ### as TotalVarDist(N1,N2) ## Kolmogorov distance of cdfs ## the distance is evaluated on a random grid z <- r(Unif(Min=low, Max=upp))(1e5) dk <- max(abs(p(X)(z)-p(H)(z))) cat("Kolmogorov distance of cdfs:\t", dk, "\n") # ~2.5e-06 ### meanwhile realized in package "distrEx" ### as KolmogorovDist(N1,N2) ## old distroptions distroptions("TruncQuantile" = oldeps) @ \subsection{Truncation and Huberization/winsorization}\label{truncex} has been integrated to the package itself, see section~\ref{TruncMin} \subsection{Distribution of minimum and maximum of two independent random variables}\label{minmaxex} has been integrated to the package itself, see section~\ref{TruncMin} \subsection{Instructive destructive example}\label{destrex} %\begin{footnotesize} % Code also available under\newline \href{http://www.uni-bayreuth.de/departments/math/org/mathe7/DISTR/destructive.R}% % {\parbox[t]{12cm}{$\mbox{\hspace{2cm}}${\tt http://www.uni-bayreuth.de/departments/math/org/}\\% % {$\mbox{\hspace{2cm}}$\hphantom{\tt http:/}{\tt /mathe7/DISTR/destructive.R}}}}\\[2ex] %\end{footnotesize} <>= ########################################################## ## Demo: Instructive destructive example ########################################################## require(distr) ## package "distr" encourages ## consistency but does not ## enforce it---so in general ## d o n o t m o d i f y ## slots d,p,q,r! N <- Norm() B <- Binom() N@d <- B@d plot(N, lwd = 3) @ \subsection{A simulation example}\label{simex} {\bf needs packages \pkg{distrSim}/\pkg{distrTEst}}\\[2ex] %\begin{footnotesize} % Code also available under\newline \href{http://www.uni-bayreuth.de/departments/math/org/mathe7/DISTR/SimulateandEstimate.R}% % {\parbox[t]{12cm}{$\mbox{\hspace{2cm}}${\tt http://www.uni-bayreuth.de/departments/math/org/}\\% % {$\mbox{\hspace{2cm}}$\hphantom{\tt http:/}{\tt /mathe7/DISTR/SimulateandEstimate.R}}}}\\[2ex] %\end{footnotesize} <>= require(distrTEst) ### also loads distrSim sim <- new("Simulation", seed = setRNG(), distribution = Norm(mean = 0, sd = 1), filename="sim_01", runs = 1000, samplesize = 30) contsim <- new("Contsimulation", seed = setRNG(), distribution.id = Norm(mean = 0, sd = 1), distribution.c = Norm(mean = 0, sd = 9), rate = 0.1, filename="contsim_01", runs = 1000, samplesize = 30) simulate(sim) simulate(contsim) sim summary(contsim) plot(contsim) @ <>= require(distrTEst) psim <- function(theta,y,m0){ mean(pmin(pmax(-m0, y - theta), m0)) } mestimator <- function(x, m = 0.7) { uniroot(f = psim, lower = -20, upper = 20, tol = 1e-10, y = x, m0 = m, maxiter = 20)$root } result.id.mean <- evaluate(sim, mean) result.id.mest <- evaluate(sim, mestimator) result.id.median <- evaluate(sim, median) result.cont.mean <- evaluate(contsim, mean) result.cont.mest <- evaluate(contsim, mestimator) result.cont.median <- evaluate(contsim, median) elist <- EvaluationList(result.cont.mean, result.cont.mest, result.cont.median) elist summary(elist) plot(elist, cex = 0.7, las = 2) @ \par \begin{footnotesize} Output by \code{plot}/\code{show}-method for an object of class \code{Evaluation} <>= result.cont.mest @ \end{footnotesize} \begin{footnotesize} Output by \code{summary}-method for an object of class \code{EvaluationList} <>= summary(elist) @ \end{footnotesize} \begin{small} In this example we present a standard robust simulation study that --- in variations --- arises in almost every paper on Robust Statistics. We do this with the tools provided by our package\ldots \end{small} \subsection{Expectation of a given function under a given distribution} \begin{footnotesize} % Code also available under\newline \href{http://www.uni-bayreuth.de/departments/math/org/mathe7/DISTR/Expectation.R}% % {\parbox[t]{12cm}{$\mbox{\hspace{2cm}}${\tt http://www.uni-bayreuth.de/departments/math/org/}\\% % {$\mbox{\hspace{2cm}}$\hphantom{\tt http:/}{\tt /mathe7/DISTR/Expectation.R}}}}\\[2ex] This code is for illustration only; in the mean-time, the expectation- and variance operators implemented in this example have been included to package \pkg{distrEx} where their functionality has further been extended. \end{footnotesize} \begin{small} As in examples~\ref{truncex} and \ref{minmaxex}, we illustrate the use of package {\tt "distr"} by implementing a general evaluation of expectation and variance under a given distribution. \end{small} <>= require("distrEx") # Example id <- function(x) x sq <- function(x) x^2 # Expectation and Variance of Binom(6,0.5) B <- Binom(6, 0.5) E(B, id) E(B, sq) - E(B, id)^2 # Expectation and Variance of Norm(1,1) N <- Norm(1, 1) E(N, id) E(N, sq) - E(N, id)^2 @ \subsection{$n$-fold convolution of absolutely continuous distributions}\label{exe10} %\begin{footnotesize} % Code also available under\newline \href{http://www.uni-bayreuth.de/departments/math/org/mathe7/DISTR/nFoldConvolution.R}% % {\parbox[t]{12cm}{$\mbox{\hspace{2cm}}${\tt http://www.uni-bayreuth.de/departments/math/org/}\\% % {$\mbox{\hspace{2cm}}$\hphantom{\tt http:/}{\tt /mathe7/DISTR/nFoldConvolution.R}}}}\\[2ex] %\end{footnotesize} \begin{small} Might be useful for teaching the CLT: a straightforward implementation of the $n$--fold convolution of an arbitrary implemented absolutely continuous distribution --- to show accuracy of our method we compare it to the exact formula valid for $n$-fold convolution of normal distributions.\\ From version 1.9 this is integrated to package \pkg{distr}. \end{small} <>= ########################################################## ## Demo: n-fold convolution of absolutely continuous ## probability distributions ########################################################## require(distr) if(!isGeneric("convpow")) setGeneric("convpow", function(D1,...) standardGeneric("convpow")) ########################################################## ## Function for n-fold convolution ## -- absolute continuous distribution -- ########################################################## ##implentation of Algorithm 3.4. of # Kohl, M., Ruckdeschel, P., Stabla, T. (2005): # General purpose convolution algorithm for distributions # in S4-Classes by means of FFT. # Technical report, Feb. 2005. Also available in # http://www.uni-bayreuth.de/departments/math/org/mathe7/ # /RUCKDESCHEL/pubs/comp.pdf setMethod("convpow", signature(D1 = "AbscontDistribution"), function(D1, N){ if((N < 1)||(!identical(floor(N), N))) stop("N has to be a natural greater than 0") m <- getdistrOption("DefaultNrFFTGridPointsExponent") ##STEP 1 lower <- ifelse((q(D1)(0) > - Inf), q(D1)(0), q(D1)(getdistrOption("TruncQuantile"))) upper <- ifelse((q(D1)(1) < Inf), q(D1)(1), q(D1)(getdistrOption("TruncQuantile"), lower.tail = FALSE)) ##STEP 2 M <- 2^m h <- (upper-lower)/M if(h > 0.01) warning(paste("Grid for approxfun too wide, ", "increase DefaultNrFFTGridPointsExponent", sep="")) x <- seq(from = lower, to = upper, by = h) p1 <- p(D1)(x) ##STEP 3 p1 <- p1[2:(M + 1)] - p1[1:M] ##STEP 4 ## computation of DFT pn <- c(p1, numeric((N-1)*M)) fftpn <- fft(pn) ##STEP 5 ## convolution theorem for DFTs pn <- Re(fft(fftpn^N, inverse = TRUE)) / (N*M) pn <- (abs(pn) >= .Machine$double.eps)*pn i.max <- N*M-(N-2) pn <- c(0,pn[1:i.max]) dn <- pn / h pn <- cumsum(pn) ##STEP 6(density) ## density x <- c(N*lower,seq(from = N*lower+N/2*h, to = N*upper-N/2*h, by=h),N*upper) dnfun1 <- approxfun(x = x, y = dn, yleft = 0, yright = 0) ##STEP 7(density) standardizer <- sum(dn[2:i.max]) + (dn[1]+dn[i.max+1]) / 2 dnfun2 <- function(x) dnfun1(x) / standardizer ##STEP 6(cdf) ## cdf with continuity correction h/2 pnfun1 <- approxfun(x = x+0.5*h, y = pn, yleft = 0, yright = pn[i.max+1]) ##STEP 7(cdf) pnfun2 <- function(x) pnfun1(x) / pn[i.max+1] ## quantile with continuity correction h/2 yleft <- ifelse(((q(D1)(0) == -Inf)| (q(D1)(0) == -Inf)), -Inf, N*lower) yright <- ifelse(((q(D1)(1) == Inf)| (q(D1)(1) == Inf)), Inf, N*upper) w0 <- options("warn") options(warn = -1) qnfun1 <- approxfun(x = pnfun2(x+0.5*h), y = x+0.5*h, yleft = yleft, yright = yright) qnfun2 <- function(x){ ind1 <- (x == 0)*(1:length(x)) ind2 <- (x == 1)*(1:length(x)) y <- qnfun1(x) y <- replace(y, ind1[ind1 != 0], yleft) y <- replace(y, ind2[ind2 != 0], yright) return(y) } options(w0) rnew = function(N) apply(matrix(r(e1)(n*N), ncol=N), 1, sum) return(new("AbscontDistribution", r = rnew, d = dnfun1, p = pnfun2, q = qnfun2)) }) ## initialize a normal distribution A <- Norm(mean=0, sd=1) ## convolution power N <- 10 ## convolution via FFT AN <- convpow(as(A,"AbscontDistribution"), N) ## ... for the normal distribution , 'convpow' has an "exact" ## method by version 1.9 so the as(.,.) is needed to ## see how the algorithm above works ## convolution exact AN1 <- Norm(mean=0, sd=sqrt(N)) ## plots of the results eps <- getdistrOption("TruncQuantile") par(mfrow=c(1,3)) low <- q(AN1)(eps) upp <- q(AN1)(eps, lower.tail = FALSE) x <- seq(from = low, to = upp, length = 10000) ## densities plot(x, d(AN1)(x), type = "l", lwd = 5) lines(x , d(AN)(x), col = "orange", lwd = 1) title("Densities") legend("topleft", legend=c("exact", "FFT"), fill=c("black", "orange")) ## cdfs plot(x, p(AN1)(x), type = "l", lwd = 5) lines(x , p(AN)(x), col = "orange", lwd = 1) title("CDFs") legend("topleft", legend=c("exact", "FFT"), fill=c("black", "orange")) ## quantile functions x <- seq(from = eps, to = 1-eps, length = 1000) plot(x, q(AN1)(x), type = "l", lwd = 5) lines(x , q(AN)(x), col = "orange", lwd = 1) title("Quantile functions") legend("topleft", legend = c("exact", "FFT"), fill = c("black", "orange")) @ %------------------------------------------------------------------------------- \begin{thebibliography}{8} \bibitem{Beng:03} Bengtsson H. (2003): \newblock The {R.oo} package - object-oriented programming with references using standard {R} code. \newblock In: Hornik K., Leisch F. and Zeileis A. (Eds.) {\em Proceedings of the 3rd International Workshop on Distributed Statistical Computing (DSC 2003)\/}. Vienna, Austria. \newblock Published as http://www.ci.tuwien.ac.at/Conferences/DSC-2003/ \bibitem{Cham:98} Chambers J.M. (1998): \newblock {\em {Programming with data. A guide to the S language}\/}. \newblock {Springer}. \newblock https://statweb.stanford.edu/~jmc4/Sbook/ \bibitem{OOPGent} Gentleman R. (2003): \newblock {\em Object Orientated Programming. Slides of a Short Course held in Auckland\/}. \newblock http://www.stat.auckland.ac.nz/S-Workshop/Gentleman/Methods.pdf \bibitem{MK:05} Kohl M. (2005): \newblock {\em Numerical Contributions to the Asymptotic Theory of Robustness\/}. \newblock {Dissertation}, Universit{\"a}t Bayreuth. \newblock See also http://stamats.de/ThesisMKohl.pdf \bibitem{K:R:S:04} Ruckdeschel P. and Kohl, M. (2014): \newblock {General Purpose Convolution Algorithm for Distributions in S4-Classes by means of FFT}. \newblock {\em J. Statist. Software\/}, {\bf 59}(4): 1--25. \bibitem{NumR:92} Press W.H., Teukolsky S.A., Vetterling W.T. and Flannery B.P. (1992): \newblock {\em {Numerical recipes in C. The art of scientific computing.}\/} \newblock {Cambridge Univ. Press}, 2. Aufl. \bibitem{Ric:88} Rice J.A. (1988): \newblock {\em {Mathematical statistics and data analysis}\/}. \newblock The Wadsworth \& Brooks/Cole Statistics/Probability Series. {Wadsworth \& Brooks/Cole Advanced Books \& Software}, Pacific Grove, California. \bibitem{R:K:S:C:04} Ruckdeschel P., Kohl M., Stabla T., and Camphausen F. (2006): \newblock {S4 Classes for Distributions.} \newblock {\em R-News\/}, {\bf 6}(2): 10--13. \newblock https://CRAN.R-project.org/doc/Rnews/Rnews\_2006-2.pdf %\newblock See also {http://www.uni-bayreuth.de/departments/math/org/mathe7/RUCKDESCHEL/pubs/distr.pdf} \end{thebibliography} % ------------------------------------------------------------------------------- \end{document} % -------------------------------------------------------------------------------