Compare commits
2 commits
1c6db889a6
...
07b4bec23e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
07b4bec23e | ||
|
|
4a4531a413 |
1 changed files with 93 additions and 50 deletions
|
|
@ -180,14 +180,75 @@ Mathematically, the NFBP algorithm imposes the following constraint on the first
|
|||
We implemented the NFBP algorithm in Python \footnotemark, for its ease of use
|
||||
and broad recommendation. We used the \texttt{random} library to generate
|
||||
random numbers between $ 0 $ and $ 1 $ and \texttt{matplotlib} to plot the
|
||||
results in the form of histograms. We ran $ R = 10^6 $ simulations with
|
||||
$ N = 10 $ different items each.
|
||||
results in the form of histograms.
|
||||
|
||||
\footnotetext{The code is available in Annex \ref{annex:probabilistic}}
|
||||
|
||||
We will try to approximate $ \mathbb{E}[X] $ and $ \mathbb{E}[V] $ with $
|
||||
\overline{X_N} $ using $ {S_n}^2 $. This operation will be done for both $ R =
|
||||
2 $ and $ R = 10^6 $ simulations.
|
||||
|
||||
\[
|
||||
\overline{X_N} = \frac{1}{N} \sum_{i=1}^{N} X_i
|
||||
\]
|
||||
|
||||
As the variance value is unknown, we will use $ {S_n}^2 $ to estimate the
|
||||
variance and further determine the Confidence Interval (95 \% certainty).
|
||||
|
||||
\begin{align*}
|
||||
{S_N}^2 & = \frac{1}{N-1} \sum_{i=1}^{N} (X_i - \overline{X_N})^2 \\
|
||||
IC_{95\%}(m) & = \left[ \overline{X_N} \pm \frac{S_N}{\sqrt{N}} \cdot t_{1 - \frac{\alpha}{2}, N-1} \right] \\
|
||||
\end{align*}
|
||||
|
||||
|
||||
|
||||
|
||||
\paragraph{2 simulations} We first ran $ R = 2 $ simulations to observe the
|
||||
behavior of the algorithm and the low precision of the results.
|
||||
|
||||
% TODO graph T_i 2 sim
|
||||
|
||||
On this graph, we can see each value of $ T_i $. Our calculations have yielded
|
||||
that $ \overline{T_1} = 1.0 $ and $ {S_N}^2 = 2.7 $. Our student coefficient is
|
||||
$ t_{0.95, 2} = 4.303 $.
|
||||
|
||||
\begin{align*}
|
||||
\overline{T_1} = \sum_{k=1}^{2} {T_1}_k & = 1.0 \\
|
||||
IC_{95\%}(T_1) & = \left[ 1.0 \pm 1.96 \frac{\sqrt{2.7}}{\sqrt{2}} \cdot 4.303 \right] \\
|
||||
& = \left[ 1 \pm 9.8 \right] \\
|
||||
\end{align*}
|
||||
|
||||
With two simulations, we obtain $ \overline{T_1} = 1.0 $.
|
||||
|
||||
|
||||
|
||||
IC observed
|
||||
|
||||
We then ran $ R = 10^6 $ simulations with $ N = 50 $ different items each.
|
||||
With 10 6 simulations, we obtain Xn barre = cf graphe
|
||||
Calcul Sn carre
|
||||
IC observed
|
||||
|
||||
|
||||
Same for V.
|
||||
|
||||
|
||||
Graphe H
|
||||
|
||||
\paragraph{Distribution of $ T_i $} We first studied how many items were
|
||||
present per bin.
|
||||
|
||||
% TODO sim of T_i
|
||||
|
||||
We determined the empirical mean to be
|
||||
|
||||
\[
|
||||
\overline{T_i} = \frac{1}{20} \sum_{k=1}^{20} T_k = 1.5 \qquad \forall 1 \leq i \leq 20
|
||||
\]
|
||||
|
||||
|
||||
We can show
|
||||
|
||||
\paragraph{Distribution of $ V_i $} We then looked at the size of the first
|
||||
item in each bin.
|
||||
|
||||
|
|
@ -262,7 +323,7 @@ bin. We have that
|
|||
T_i = k \iff U_1 + U_2 + \ldots + U_{k-1} < 1 \text{ and } U_1 + U_2 + \ldots + U_{k} \geq 1
|
||||
\end{equation}
|
||||
|
||||
Let $ A_k = \{ U_1 + U_2 + \ldots + U_{k-1} < 1 \}$. Hence,
|
||||
Let $ A_k = \{ U_1 + U_2 + \ldots + U_{k} < 1 \}$. Hence,
|
||||
|
||||
\begin{align*}
|
||||
% TODO = k
|
||||
|
|
@ -271,61 +332,33 @@ Let $ A_k = \{ U_1 + U_2 + \ldots + U_{k-1} < 1 \}$. Hence,
|
|||
& = P(A_{k-1}) - P(A_k) \qquad \text{ (as $ A_k \subset A_{k-1} $)} \\
|
||||
\end{align*}
|
||||
|
||||
We will try to show that $ \forall k \geq 2 $, $ P(A_k) = \frac{1}{k!} $. To do
|
||||
We will try to show that $ \forall k \geq 1 $, $ P(A_k) = \frac{1}{k!} $. To do
|
||||
so, we will use induction to prove the following proposition \eqref{eq:induction},
|
||||
$ \forall k \geq 2 $:
|
||||
$ \forall k \geq 1 $:
|
||||
|
||||
\begin{equation}
|
||||
\label{eq:induction}
|
||||
\tag{$ \mathcal{H}_k $}
|
||||
P(U_1 + U_2 + \ldots + U_{k-1} < a) = \frac{a^k}{k!} \qquad \forall a \in [0, 1],
|
||||
P(U_1 + U_2 + \ldots + U_{k} < a) = \frac{a^k}{k!} \qquad \forall a \in [0, 1],
|
||||
\end{equation}
|
||||
|
||||
Let us denote $ S_k = U_1 + U_2 + \ldots + U_{k-1} \qquad \forall k \geq 2 $.
|
||||
Let us denote $ S_k = U_1 + U_2 + \ldots + U_{k} \qquad \forall k \geq 1 $.
|
||||
|
||||
\paragraph{Base cases} $ k = 2 $ : $ P(U_1 < a) = a \neq \frac{a^2}{2}$ supposedly proving $ (\mathcal{H}_2) $.
|
||||
\paragraph{Base case} $ k = 1 $ : $ P(U_1 < a) = a = \frac{a^1}{1!}$, proving $ (\mathcal{H}_1) $.
|
||||
|
||||
$ k = 2 $ : \[ P(U_1 + U_2 < a) = \iint_{\cal{D}} f_{U_1, U_2}(x, y) \cdot (x + y) dxdy \]
|
||||
|
||||
Where $ \mathcal{D} = \{ (x, y) \in [0, 1]^2 \mid x + y < a \} $.
|
||||
|
||||
$ U_1 $ and $ U_2 $ are independent, so
|
||||
\begin{align*}
|
||||
f_{U_1, U_2}(x, y) & = f_{U_1}(x) \cdot f_{U_2}(y) \\
|
||||
& = \begin{cases}
|
||||
1 & \text{if } x \in [0, 1] \text{ and } y \in [0, 1] \\
|
||||
0 & \text{otherwise} \\
|
||||
\end{cases} \\
|
||||
\end{align*}
|
||||
|
||||
Hence,
|
||||
\paragraph{Induction step} Let $ k \geq 2 $. We assume $ (\mathcal{H}_{k-1}) $ is
|
||||
true. We will show that $ (\mathcal{H}_{k}) $ is true.
|
||||
|
||||
\begin{align*}
|
||||
P(U_1 + U_2 < a)
|
||||
& = \iint_{\cal{D}} (x + y)dxdy \\
|
||||
& = \int_{0}^{a} \int_{0}^{a - x} (x + y) dy dx \\
|
||||
& = \int_{0}^{a} \left[ xy + \frac{y^2}{2} \right]_{y=0}^{y=a - x} dx \\
|
||||
& = \int_{0}^{a} \left( ax - x^2 + \frac{a^2}{2} - ax + \frac{x^2}{2} \right) dx \\
|
||||
& = \int_{0}^{a} \left( \frac{a^2}{2} - \frac{x^2}{2} \right) dx \\
|
||||
& = \left[ \frac{a^2 x}{2} - \frac{x^3}{6} \right]_{0}^{a} \\
|
||||
& = \frac{a^3}{2} - \frac{a^3}{6} \\
|
||||
P(S_k < a) & = P(S_{k-1} + U_k < a) \\
|
||||
& = \iint_{\cal{D}} f_{S_{k-1}, U_k}(x, y) dxdy \\
|
||||
\text{Where } \mathcal{D} & = \{ (x, y) \in [0, 1]^2 \mid x + y < a \} \\
|
||||
& = \{ (x, y) \in [0, 1]^2 \mid 0 < x < a \text{ and } 0 < y < a - x \} \\
|
||||
P(S_k < a) & = \iint_{\cal{D}} f_{S_{k-1}}(x) \cdot f_{U_k}(y) dxdy \qquad
|
||||
\text{because $ S_{k-1} $ and $ U_k $ are independent} \\
|
||||
& = \int_{0}^{a} f_{S_{k-1}}(x) \cdot \left( \int_{0}^{a-x} f_{U_k}(y) dy \right) dx \\
|
||||
\end{align*}
|
||||
|
||||
|
||||
\paragraph{Induction step} For a fixed $ k > 2 $, we assume that $
|
||||
(\mathcal{H}_{k-1}) $ is true. We will try to prove $ (\mathcal{H}_{k}) $.
|
||||
|
||||
\[
|
||||
P(S_{k-1} + U_{k-1} < a)
|
||||
= \iint_{\cal{D}} f_{S_{k-1}, U_{k-1}}(x, y) \cdot (x + y) dxdy \\
|
||||
\]
|
||||
where $ \mathcal{D} = \{ (x, y) \in [0, 1]^2 \mid x + y < a \} $.
|
||||
As $ S_{k-1} $ and $ U_{k-1} $ are independent,
|
||||
\[
|
||||
P(S_{k-1} + U_{k-1} < a)
|
||||
= \iint_{\cal{D}} f_{S_{k-1}}(x) \cdot f_{U_{k-1}}(y) \cdot (x + y) dxdy \qquad \\
|
||||
\]
|
||||
|
||||
$ (\mathcal{H}_{k-1}) $ gives us that $ \forall x \in [0, 1] $,
|
||||
$ F_{S_{k-1}}(x) = P(S_{k-1} < x) = \frac{x^{k-1}}{(k-1)!} $.
|
||||
|
||||
|
|
@ -336,15 +369,25 @@ By differentiating, we get that $ \forall x \in [0, 1] $,
|
|||
\]
|
||||
|
||||
Furthermore, $ U_{k-1} $ is uniformly distributed on $ [0, 1] $, so
|
||||
$ f_{U_{k-1}}(y) = 1 $.
|
||||
$ f_{U_{k-1}}(y) = 1 $. We can then integrate by parts :
|
||||
|
||||
\begin{align*}
|
||||
\text{Hence, }
|
||||
P(S_{k-1} + U_{k-1} < a)
|
||||
& =
|
||||
& = \frac{a^{k}}{k!}
|
||||
P(S_k < a)
|
||||
& = \int_{0}^{a} f_{S_{k-1}}(x) \cdot \left( \int_{0}^{a-x} 1 dy \right) dx \\
|
||||
& = \int_{0}^{a} f_{S_{k-1}}(x) \cdot (a - x) dx \\
|
||||
& = a \int_{0}^{a} f_{S_{k-1}}(x) dx - \int_{0}^{a} x f_{S_{k-1}}(x) dx \\
|
||||
& = a \int_0^a F'_{S_{k-1}}(x) dx - \left[ x F_{S_{k-1}}(x) \right]_0^a
|
||||
+ \int_{0}^{a} x F_{S_{k-1}}(x) dx \qquad \text{(IPP)} \\
|
||||
& = a \left[ F_{S_{k-1}}(x) \right]_0^a - \left[ x F_{S_{k-1}}(x) \right]_0^a
|
||||
+ \int_{0}^{a} \frac{x^{k-1}}{(k-1)!} dx \\
|
||||
& = \left[ \frac{x^k}{k!} \right]_0^a \\
|
||||
& = \frac{a^k}{k!} \\
|
||||
\end{align*}
|
||||
|
||||
We have shown that $ (\mathcal{H}_{k}) $ is true, so by induction, $ \forall k \geq 1 $,
|
||||
$ \forall a \in [0, 1] $, $ P(U_1 + U_2 + \ldots + U_{k} < a) = \frac{a^k}{k!} $. Take
|
||||
$ a = 1 $ to get $ P(U_1 + U_2 + \ldots + U_{k} < 1) = \frac{1}{k!} $.
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue