Compare commits

...

2 commits

Author SHA1 Message Date
Paul ALNET
07b4bec23e tex: NFDBP demo ! 2023-06-04 19:37:02 +02:00
Paul ALNET
4a4531a413 tex: NFBP stats 2023-06-04 18:39:18 +02:00

View file

@ -180,14 +180,75 @@ Mathematically, the NFBP algorithm imposes the following constraint on the first
We implemented the NFBP algorithm in Python \footnotemark, for its ease of use We implemented the NFBP algorithm in Python \footnotemark, for its ease of use
and broad recommendation. We used the \texttt{random} library to generate and broad recommendation. We used the \texttt{random} library to generate
random numbers between $ 0 $ and $ 1 $ and \texttt{matplotlib} to plot the random numbers between $ 0 $ and $ 1 $ and \texttt{matplotlib} to plot the
results in the form of histograms. We ran $ R = 10^6 $ simulations with results in the form of histograms.
$ N = 10 $ different items each.
\footnotetext{The code is available in Annex \ref{annex:probabilistic}} \footnotetext{The code is available in Annex \ref{annex:probabilistic}}
We will try to approximate $ \mathbb{E}[X] $ and $ \mathbb{E}[V] $ with $
\overline{X_N} $ using $ {S_n}^2 $. This operation will be done for both $ R =
2 $ and $ R = 10^6 $ simulations.
\[
\overline{X_N} = \frac{1}{N} \sum_{i=1}^{N} X_i
\]
As the variance value is unknown, we will use $ {S_n}^2 $ to estimate the
variance and further determine the Confidence Interval (95 \% certainty).
\begin{align*}
{S_N}^2 & = \frac{1}{N-1} \sum_{i=1}^{N} (X_i - \overline{X_N})^2 \\
IC_{95\%}(m) & = \left[ \overline{X_N} \pm \frac{S_N}{\sqrt{N}} \cdot t_{1 - \frac{\alpha}{2}, N-1} \right] \\
\end{align*}
\paragraph{2 simulations} We first ran $ R = 2 $ simulations to observe the
behavior of the algorithm and the low precision of the results.
% TODO graph T_i 2 sim
On this graph, we can see each value of $ T_i $. Our calculations have yielded
that $ \overline{T_1} = 1.0 $ and $ {S_N}^2 = 2.7 $. Our student coefficient is
$ t_{0.95, 2} = 4.303 $.
\begin{align*}
\overline{T_1} = \sum_{k=1}^{2} {T_1}_k & = 1.0 \\
IC_{95\%}(T_1) & = \left[ 1.0 \pm 1.96 \frac{\sqrt{2.7}}{\sqrt{2}} \cdot 4.303 \right] \\
& = \left[ 1 \pm 9.8 \right] \\
\end{align*}
With two simulations, we obtain $ \overline{T_1} = 1.0 $.
IC observed
We then ran $ R = 10^6 $ simulations with $ N = 50 $ different items each.
With 10 6 simulations, we obtain Xn barre = cf graphe
Calcul Sn carre
IC observed
Same for V.
Graphe H
\paragraph{Distribution of $ T_i $} We first studied how many items were \paragraph{Distribution of $ T_i $} We first studied how many items were
present per bin. present per bin.
% TODO sim of T_i
We determined the empirical mean to be
\[
\overline{T_i} = \frac{1}{20} \sum_{k=1}^{20} T_k = 1.5 \qquad \forall 1 \leq i \leq 20
\]
We can show
\paragraph{Distribution of $ V_i $} We then looked at the size of the first \paragraph{Distribution of $ V_i $} We then looked at the size of the first
item in each bin. item in each bin.
@ -262,7 +323,7 @@ bin. We have that
T_i = k \iff U_1 + U_2 + \ldots + U_{k-1} < 1 \text{ and } U_1 + U_2 + \ldots + U_{k} \geq 1 T_i = k \iff U_1 + U_2 + \ldots + U_{k-1} < 1 \text{ and } U_1 + U_2 + \ldots + U_{k} \geq 1
\end{equation} \end{equation}
Let $ A_k = \{ U_1 + U_2 + \ldots + U_{k-1} < 1 \}$. Hence, Let $ A_k = \{ U_1 + U_2 + \ldots + U_{k} < 1 \}$. Hence,
\begin{align*} \begin{align*}
% TODO = k % TODO = k
@ -271,61 +332,33 @@ Let $ A_k = \{ U_1 + U_2 + \ldots + U_{k-1} < 1 \}$. Hence,
& = P(A_{k-1}) - P(A_k) \qquad \text{ (as $ A_k \subset A_{k-1} $)} \\ & = P(A_{k-1}) - P(A_k) \qquad \text{ (as $ A_k \subset A_{k-1} $)} \\
\end{align*} \end{align*}
We will try to show that $ \forall k \geq 2 $, $ P(A_k) = \frac{1}{k!} $. To do We will try to show that $ \forall k \geq 1 $, $ P(A_k) = \frac{1}{k!} $. To do
so, we will use induction to prove the following proposition \eqref{eq:induction}, so, we will use induction to prove the following proposition \eqref{eq:induction},
$ \forall k \geq 2 $: $ \forall k \geq 1 $:
\begin{equation} \begin{equation}
\label{eq:induction} \label{eq:induction}
\tag{$ \mathcal{H}_k $} \tag{$ \mathcal{H}_k $}
P(U_1 + U_2 + \ldots + U_{k-1} < a) = \frac{a^k}{k!} \qquad \forall a \in [0, 1], P(U_1 + U_2 + \ldots + U_{k} < a) = \frac{a^k}{k!} \qquad \forall a \in [0, 1],
\end{equation} \end{equation}
Let us denote $ S_k = U_1 + U_2 + \ldots + U_{k-1} \qquad \forall k \geq 2 $. Let us denote $ S_k = U_1 + U_2 + \ldots + U_{k} \qquad \forall k \geq 1 $.
\paragraph{Base cases} $ k = 2 $ : $ P(U_1 < a) = a \neq \frac{a^2}{2}$ supposedly proving $ (\mathcal{H}_2) $. \paragraph{Base case} $ k = 1 $ : $ P(U_1 < a) = a = \frac{a^1}{1!}$, proving $ (\mathcal{H}_1) $.
$ k = 2 $ : \[ P(U_1 + U_2 < a) = \iint_{\cal{D}} f_{U_1, U_2}(x, y) \cdot (x + y) dxdy \] \paragraph{Induction step} Let $ k \geq 2 $. We assume $ (\mathcal{H}_{k-1}) $ is
true. We will show that $ (\mathcal{H}_{k}) $ is true.
Where $ \mathcal{D} = \{ (x, y) \in [0, 1]^2 \mid x + y < a \} $.
$ U_1 $ and $ U_2 $ are independent, so
\begin{align*}
f_{U_1, U_2}(x, y) & = f_{U_1}(x) \cdot f_{U_2}(y) \\
& = \begin{cases}
1 & \text{if } x \in [0, 1] \text{ and } y \in [0, 1] \\
0 & \text{otherwise} \\
\end{cases} \\
\end{align*}
Hence,
\begin{align*} \begin{align*}
P(U_1 + U_2 < a) P(S_k < a) & = P(S_{k-1} + U_k < a) \\
& = \iint_{\cal{D}} (x + y)dxdy \\ & = \iint_{\cal{D}} f_{S_{k-1}, U_k}(x, y) dxdy \\
& = \int_{0}^{a} \int_{0}^{a - x} (x + y) dy dx \\ \text{Where } \mathcal{D} & = \{ (x, y) \in [0, 1]^2 \mid x + y < a \} \\
& = \int_{0}^{a} \left[ xy + \frac{y^2}{2} \right]_{y=0}^{y=a - x} dx \\ & = \{ (x, y) \in [0, 1]^2 \mid 0 < x < a \text{ and } 0 < y < a - x \} \\
& = \int_{0}^{a} \left( ax - x^2 + \frac{a^2}{2} - ax + \frac{x^2}{2} \right) dx \\ P(S_k < a) & = \iint_{\cal{D}} f_{S_{k-1}}(x) \cdot f_{U_k}(y) dxdy \qquad
& = \int_{0}^{a} \left( \frac{a^2}{2} - \frac{x^2}{2} \right) dx \\ \text{because $ S_{k-1} $ and $ U_k $ are independent} \\
& = \left[ \frac{a^2 x}{2} - \frac{x^3}{6} \right]_{0}^{a} \\ & = \int_{0}^{a} f_{S_{k-1}}(x) \cdot \left( \int_{0}^{a-x} f_{U_k}(y) dy \right) dx \\
& = \frac{a^3}{2} - \frac{a^3}{6} \\
\end{align*} \end{align*}
\paragraph{Induction step} For a fixed $ k > 2 $, we assume that $
(\mathcal{H}_{k-1}) $ is true. We will try to prove $ (\mathcal{H}_{k}) $.
\[
P(S_{k-1} + U_{k-1} < a)
= \iint_{\cal{D}} f_{S_{k-1}, U_{k-1}}(x, y) \cdot (x + y) dxdy \\
\]
where $ \mathcal{D} = \{ (x, y) \in [0, 1]^2 \mid x + y < a \} $.
As $ S_{k-1} $ and $ U_{k-1} $ are independent,
\[
P(S_{k-1} + U_{k-1} < a)
= \iint_{\cal{D}} f_{S_{k-1}}(x) \cdot f_{U_{k-1}}(y) \cdot (x + y) dxdy \qquad \\
\]
$ (\mathcal{H}_{k-1}) $ gives us that $ \forall x \in [0, 1] $, $ (\mathcal{H}_{k-1}) $ gives us that $ \forall x \in [0, 1] $,
$ F_{S_{k-1}}(x) = P(S_{k-1} < x) = \frac{x^{k-1}}{(k-1)!} $. $ F_{S_{k-1}}(x) = P(S_{k-1} < x) = \frac{x^{k-1}}{(k-1)!} $.
@ -336,15 +369,25 @@ By differentiating, we get that $ \forall x \in [0, 1] $,
\] \]
Furthermore, $ U_{k-1} $ is uniformly distributed on $ [0, 1] $, so Furthermore, $ U_{k-1} $ is uniformly distributed on $ [0, 1] $, so
$ f_{U_{k-1}}(y) = 1 $. $ f_{U_{k-1}}(y) = 1 $. We can then integrate by parts :
\begin{align*} \begin{align*}
\text{Hence, } P(S_k < a)
P(S_{k-1} + U_{k-1} < a) & = \int_{0}^{a} f_{S_{k-1}}(x) \cdot \left( \int_{0}^{a-x} 1 dy \right) dx \\
& = & = \int_{0}^{a} f_{S_{k-1}}(x) \cdot (a - x) dx \\
& = \frac{a^{k}}{k!} & = a \int_{0}^{a} f_{S_{k-1}}(x) dx - \int_{0}^{a} x f_{S_{k-1}}(x) dx \\
& = a \int_0^a F'_{S_{k-1}}(x) dx - \left[ x F_{S_{k-1}}(x) \right]_0^a
+ \int_{0}^{a} x F_{S_{k-1}}(x) dx \qquad \text{(IPP)} \\
& = a \left[ F_{S_{k-1}}(x) \right]_0^a - \left[ x F_{S_{k-1}}(x) \right]_0^a
+ \int_{0}^{a} \frac{x^{k-1}}{(k-1)!} dx \\
& = \left[ \frac{x^k}{k!} \right]_0^a \\
& = \frac{a^k}{k!} \\
\end{align*} \end{align*}
We have shown that $ (\mathcal{H}_{k}) $ is true, so by induction, $ \forall k \geq 1 $,
$ \forall a \in [0, 1] $, $ P(U_1 + U_2 + \ldots + U_{k} < a) = \frac{a^k}{k!} $. Take
$ a = 1 $ to get $ P(U_1 + U_2 + \ldots + U_{k} < 1) = \frac{1}{k!} $.