merge

2023-06-04 23:28:52 +02:00 · 2023-06-04 23:28:52 +02:00 · 1f279bf2b9
commit 1f279bf2b9
parent 839b6f79ec 6ceddb79f2
7 changed files with 165 additions and 69 deletions
--- a/Probas.py
+++ b/Probas.py
@ -420,7 +420,7 @@ def basic_demo():
        )
-stats_NFBP_iter(2 *10**0, 50)
+stats_NFBP_iter(10**5, 50)
 print("\n\n")
 stats_NFDBP(10**3, 10, 1)
--- a/latex/content.tex
+++ b/latex/content.tex
@ -180,28 +180,107 @@ Mathematically, the NFBP algorithm imposes the following constraint on the first
 We implemented the NFBP algorithm in Python \footnotemark, for its ease of use
 and broad recommendation. We used the \texttt{random} library to generate
 random numbers between $ 0 $ and $ 1 $ and \texttt{matplotlib} to plot the
-results in the form of histograms. We ran $ R = 10^6 $ simulations with
+results in the form of histograms.
 $ N = 10 $ different items each.
 \footnotetext{The code is available in Annex \ref{annex:probabilistic}}
-\paragraph{Distribution of $ T_i $} We first studied how many items were
+We will try to approximate $ \mathbb{E}[R] $ and $ \mathbb{E}[V] $ with $
-present per bin.
+	\overline{X_N} $ using $ {S_n}^2 $. This operation will be done for both $ R =
 	2 $ and $ R = 10^6 $ simulations.
-\paragraph{Distribution of $ V_i $} We then looked at the size of the first
+\[
-item in each bin.
+	\overline{X_N} = \frac{1}{N} \sum_{i=1}^{N} X_i
 \]
 As the variance value is unknown, we will use $ {S_n}^2 $ to estimate the
 variance and further determine the Confidence Interval (95 \% certainty).
 \begin{align*}
 	{S_N}^2      & = \frac{1}{N-1} \sum_{i=1}^{N} (X_i - \overline{X_N})^2                                      \\
 	IC_{95\%}(m) & = \left[ \overline{X_N} \pm \frac{S_N}{\sqrt{N}} \cdot t_{1 - \frac{\alpha}{2}, N-1} \right] \\
 \end{align*}
 \paragraph{2 simulations} We first ran $ R = 2 $ simulations to observe the
 behavior of the algorithm and the low precision of the results.
 \begin{figure}[h]
 	\centering
 	\includegraphics[width=0.8\textwidth]{graphics/graphic-NFBP-Ti-2-sim}
 	\caption{Histogram of $ T_i $ for $ R = 2 $ simulations and $ N = 50 $ items (number of items per bin)}
 	\label{fig:graphic-NFBP-Ti-2-sim}
 \end{figure}
 On this graph (figure \ref{fig:graphic-NFBP-Ti-2-sim}), we can see each value
 of $ T_i $. Our calculations have yielded that $ \overline{T_1} = 1.0 $ and $
 	{S_N}^2 = 2.7 $. Our Student coefficient is $ t_{0.95, 2} = 4.303 $.
 We can now calculate the Confidence Interval for $ T_1 $ for $ R = 2 $ simulations :
 \begin{align*}
 	IC_{95\%}(T_1) & = \left[ 1.0 \pm 1.96 \frac{\sqrt{2.7}}{\sqrt{2}} \cdot 4.303 \right] \\
 	               & = \left[ 1 \pm 9.8 \right]                                            \\
 \end{align*}
 We can see that the Confidence Interval is very large, which is due to the low
 number of simulations. Looking at figure \ref{fig:graphic-NFBP-Ti-2-sim}, we
 easily notice the high variance.
 \begin{figure}[h]
 	\centering
 	\includegraphics[width=0.8\textwidth]{graphics/graphic-NFBP-Vi-2-sim}
 	\caption{Histogram of $ V_i $ for $ R = 2 $ simulations and $ N = 50 $ items (size of the first item in a bin)}
 	\label{fig:graphic-NFBP-Vi-2-sim}
 \end{figure}
 On the graph of $ V_i $ (figure \ref{fig:graphic-NFBP-Vi-2-sim}), we can see
 that the sizes are scattered pseudo-randomly between $ 0 $ and $ 1 $, which is
 unsuprising given the low number of simulations. The process determinig the statistics
 is the same as for $ T_i $, yielding $ \overline{V_1} = 0.897 $, $ {S_N}^2 =
 	0.2 $ and $ IC_{95\%}(V_1) = \left[ 0.897 \pm 1.3 \right] $. In this particular run,
 the two values for $ V_1 $ are high (being bouded between $ 0 $ and $ 1 $).
 \paragraph{100 000 simulations} In order to ensure better precision, we then
 ran $ R = 10^5 $ simulations with $ N	= 50 $ different items each.
 \begin{figure}[h]
 	\centering
 	\includegraphics[width=0.8\textwidth]{graphics/graphic-NFBP-Ti-105-sim}
 	\caption{Histogram of $ T_i $ for $ R = 10^5 $ simulations and $ N = 50 $ items (number of items per bin)}
 	\label{fig:graphic-NFBP-Ti-105-sim}
 \end{figure}
 On this graph (figure \ref{fig:graphic-NFBP-Ti-2-sim}), we can see each value
 of $ T_i $. Our calculations have yielded that $ \overline{T_1} = 1.72 $ and $
 	{S_N}^2 = 0.88 $. Our Student coefficient is $ t_{0.95, 2} = 2 $.
 We can now calculate the Confidence Interval for $ T_1 $ for $ R = 10^5 $ simulations :
 \begin{align*}
 	IC_{95\%}(T_1) & = \left[ 1.72 \pm 1.96 \frac{\sqrt{0.88}}{\sqrt{10^5}} \cdot 2 \right] \\
 	               & = \left[ 172 \pm 0.012 \right]                                         \\
 \end{align*}
 We can see that the Confidence Interval is very small, thanks to the large number of iterations.
 This results in a steady curve in figure \ref{fig:graphic-NFBP-Ti-105-sim}.
 \begin{figure}[h]
 	\centering
 	\includegraphics[width=0.8\textwidth]{graphics/graphic-NFBP-Vi-105-sim}
 	\caption{Histogram of $ V_i $ for $ R = 10^5 $ simulations and $ N = 50 $ items (size of the first item in a bin)}
 	\label{fig:graphic-NFBP-Vi-105-sim}
 \end{figure}
 \paragraph{Asymptotic behavior of $ H_n $} Finally, we analyzed how many bins
-were needed to store $ n $ items.
+were needed to store $ n $ items. We used the numbers from the $ R = 10^5 $ simulations.
-
+
 % TODO histograms
 % TODO analysis histograms
 \cite{hofri:1987}
 % TODO mettre de l'Histoire
 \section{Next Fit Dual Bin Packing algorithm (NFDBP)}
@ -251,81 +330,53 @@ new constraints on the first bin can be expressed as follows :
 \subsection{La giga demo}
-Let $ k \in \mathbb{N} $. Let $ (U_n)_{n \in \mathbb{N}} $ be a sequence of
+Let $ k \geq 2 $. Let $ (U_n)_{n \in \mathbb{N}^*} $ be a sequence of
 independent random variables with uniform distribution on $ [0, 1] $, representing
 the size of the $ n $-th item.
 Let $ i \in \mathbb{N} $. $ T_i $ denotes the number of items in the $ i $-th
 bin. We have that
-\begin{equation}
+\begin{equation*}
 	T_i = k \iff U_1 + U_2 + \ldots + U_{k-1} < 1 \text{ and } U_1 + U_2 + \ldots + U_{k} \geq 1
-\end{equation}
+\end{equation*}
-Let $ A_k = \{ U_1 + U_2 + \ldots + U_{k-1} < 1 \}$. Hence,
+Let $ A_k = \{ U_1 + U_2 + \ldots + U_{k} < 1 \}$. Hence,
-\begin{align*}
+\begin{align}
-	% TODO = k
+	\label{eq:prob}
 	P(T_i = k)
 	 & = P(A_{k-1} \cap A_k^c)                                           \\
 	 & = P(A_{k-1}) - P(A_k) \qquad \text{ (as $ A_k \subset A_{k-1} $)} \\
-\end{align*}
+\end{align}
-We will try to show that $ \forall k \geq 2 $, $ P(A_k) = \frac{1}{k!} $. To do
+We will try to show that $ \forall k \geq 1 $, $ P(A_k) = \frac{1}{k!} $. To do
 so, we will use induction to prove the following proposition \eqref{eq:induction},
-$ \forall k \geq 2 $:
+$ \forall k \geq 1 $:
 \begin{equation}
 	\label{eq:induction}
 	\tag{$ \mathcal{H}_k $}
-	P(U_1 + U_2 + \ldots + U_{k-1} < a) = \frac{a^k}{k!} \qquad \forall a \in [0, 1],
+	P(U_1 + U_2 + \ldots + U_{k} < a) = \frac{a^k}{k!} \qquad \forall a \in [0, 1],
 \end{equation}
-Let us denote $ S_k = U_1 + U_2 + \ldots + U_{k-1} \qquad \forall k \geq 2 $.
+Let us denote $ S_k = U_1 + U_2 + \ldots + U_{k} \qquad \forall k \geq 1 $.
-\paragraph{Base cases} $ k = 2 $ : $ P(U_1 < a) = a \neq \frac{a^2}{2}$ supposedly proving $ (\mathcal{H}_2) $.
+\paragraph{Base case} $ k = 1 $ : $ P(U_1 < a) = a = \frac{a^1}{1!}$, proving $ (\mathcal{H}_1) $.
-$ k = 2 $ : \[ P(U_1 + U_2 < a) = \iint_{\cal{D}} f_{U_1, U_2}(x, y) \cdot (x + y) dxdy \]
+\paragraph{Induction step} Let $ k \geq 2 $. We assume $ (\mathcal{H}_{k-1}) $ is
-
+true. We will show that $ (\mathcal{H}_{k}) $ is true.
 Where $ \mathcal{D} = \{ (x, y) \in [0, 1]^2 \mid x + y < a \} $.
 $ U_1 $ and $ U_2 $ are independent, so
 \begin{align*}
 	f_{U_1, U_2}(x, y) & = f_{U_1}(x) \cdot f_{U_2}(y) \\
 	                   & = \begin{cases}
 		1 & \text{if } x \in [0, 1] \text{ and } y \in [0, 1] \\
 		0 & \text{otherwise}                                  \\
 	\end{cases}  \\
 \end{align*}
 Hence,
 \begin{align*}
-	P(U_1 + U_2 < a)
+	P(S_k < a)                & = P(S_{k-1} + U_k < a)                                                             \\
-	 & = \iint_{\cal{D}} (x + y)dxdy                                                  \\
+	                          & = \iint_{\cal{D}} f_{S_{k-1}, U_k}(x, y)  dxdy                                     \\
-	 & = \int_{0}^{a} \int_{0}^{a - x} (x + y) dy dx                                  \\
+	\text{Where } \mathcal{D} & = \{ (x, y) \in [0, 1]^2 \mid x + y < a \}                                         \\
-	 & = \int_{0}^{a} \left[ xy + \frac{y^2}{2} \right]_{y=0}^{y=a - x} dx            \\
+	                          & = \{ (x, y) \in [0, 1]^2 \mid 0 < x < a \text{ and } 0 < y < a - x \}              \\
-	 & = \int_{0}^{a} \left( ax - x^2 + \frac{a^2}{2} - ax + \frac{x^2}{2} \right) dx \\
+	P(S_k < a)                & = \iint_{\cal{D}} f_{S_{k-1}}(x) \cdot f_{U_k}(y) dxdy \qquad
-	 & = \int_{0}^{a} \left( \frac{a^2}{2} - \frac{x^2}{2} \right) dx                 \\
+	\text{because $ S_{k-1} $ and $ U_k $ are independent}                                                         \\
-	 & = \left[ \frac{a^2 x}{2} - \frac{x^3}{6} \right]_{0}^{a}                       \\
+	                          & = \int_{0}^{a} f_{S_{k-1}}(x) \cdot \left( \int_{0}^{a-x} f_{U_k}(y) dy \right) dx \\
 	 & = \frac{a^3}{2} - \frac{a^3}{6}                                                \\
 \end{align*}
 \paragraph{Induction step} For a fixed $ k > 2 $, we assume that $
 	(\mathcal{H}_{k-1}) $ is true. We will try to prove $ (\mathcal{H}_{k}) $.
 \[
 	P(S_{k-1} + U_{k-1} < a)
 	= \iint_{\cal{D}} f_{S_{k-1}, U_{k-1}}(x, y) \cdot (x + y) dxdy                 \\
 \]
 where $ \mathcal{D} = \{ (x, y) \in [0, 1]^2 \mid x + y < a \} $.
 As $ S_{k-1} $ and $ U_{k-1} $ are independent,
 \[
 	P(S_{k-1} + U_{k-1} < a)
 	= \iint_{\cal{D}} f_{S_{k-1}}(x) \cdot f_{U_{k-1}}(y) \cdot (x + y) dxdy \qquad \\
 \]
 $ (\mathcal{H}_{k-1}) $ gives us that $ \forall x \in [0, 1] $,
 $ F_{S_{k-1}}(x) = P(S_{k-1} < x) = \frac{x^{k-1}}{(k-1)!} $.
@ -336,17 +387,59 @@ By differentiating, we get that $ \forall x \in [0, 1] $,
 \]
 Furthermore, $ U_{k-1} $ is uniformly distributed on $ [0, 1] $, so
-$ f_{U_{k-1}}(y) = 1 $.
+$ f_{U_{k-1}}(y) = 1 $. We can then integrate by parts :
 \begin{align*}
-	\text{Hence, }
+	P(S_k < a)
-	P(S_{k-1} + U_{k-1} < a)
+	 & = \int_{0}^{a} f_{S_{k-1}}(x) \cdot \left( \int_{0}^{a-x} 1 dy \right) dx        \\
-	 & =
+	 & = \int_{0}^{a} f_{S_{k-1}}(x) \cdot (a - x) dx                                   \\
-	 & = \frac{a^{k}}{k!}
+	 & = a \int_{0}^{a} f_{S_{k-1}}(x) dx - \int_{0}^{a} x f_{S_{k-1}}(x) dx            \\
 	 & = a \int_0^a F'_{S_{k-1}}(x) dx - \left[ x F_{S_{k-1}}(x) \right]_0^a
 	+ \int_{0}^{a} F_{S_{k-1}}(x) dx \qquad \text{(IPP : }x, F_{S_{k-1}} \in C^1([0,1]) \\
 	 & = a \left[ F_{S_{k-1}}(x) \right]_0^a - \left[ x F_{S_{k-1}}(x) \right]_0^a
 	+ \int_{0}^{a} \frac{x^{k-1}}{(k-1)!} dx                                            \\
 	 & = \left[ \frac{x^k}{k!} \right]_0^a                                              \\
 	 & = \frac{a^k}{k!}                                                                 \\
 \end{align*}
 \paragraph{Conclusion} We have shown that $ (\mathcal{H}_{k}) $ is true, so by induction, $ \forall k \geq 1 $,
 $ \forall a \in [0, 1] $, $ P(U_1 + U_2 + \ldots + U_{k} < a) = \frac{a^k}{k!} $. Take
 $ a = 1 $ to get
 \[ P(U_1 + U_2 + \ldots + U_{k} < 1) = \frac{1}{k!} \]
 Finally, plugging this into \eqref{eq:prob} gives us
 \[
 	P(T_i = k) = P(A_{k-1}) - P(A_{k}) = \frac{1}{(k-1)!} - \frac{1}{k!} \qquad \forall k \geq 2
 \]
 \subsection{Expected value of $ T_i $}
 We now compute the expected value $ \mu $ and variance $ \sigma^2 $ of $ T_i $.
 \begin{align*}
 	\mu = E(T_i) & = \sum_{k=2}^{\infty} k \cdot P(T_i = k)                    \\
 	             & = \sum_{k=2}^{\infty} (\frac{k}{(k-1)!} - \frac{1}{(k-1)!}) \\
 	             & = \sum_{k=2}^{\infty} \frac{k-1}{(k-1)!}                    \\
 	             & = \sum_{k=0}^{\infty} \frac{1}{k!}                          \\
 	             & = e                                                         \\
 \end{align*}
 \begin{align*}
 	E({T_i}^2) & = \sum_{k=2}^{\infty} k^2 \cdot P(T_i = k)                    \\
 	           & = \sum_{k=2}^{\infty} (\frac{k^2}{(k-1)!} - \frac{k}{(k-1)!}) \\
 	           & = \sum_{k=2}^{\infty} \frac{(k-1)k}{(k-1)!}                   \\
 	           & = \sum_{k=2}^{\infty} \frac{k}{(k-2)!}                        \\
 	           & = \sum_{k=0}^{\infty} \frac{k+2}{k!}                          \\
 	           & = \sum_{k=0}^{\infty} (\frac{1}{(k-1)!} + \frac{2}{(k)!})     \\
 	           & = \sum_{k=0}^{\infty} \frac{1}{(k)!} - 1 + 2e                 \\
 	           & = 3e - 1
 \end{align*}
 \begin{align*}
 	\sigma^2 = E({T_i}^2) - E(T_i)^2 = 3e - 1 - e^2
 \end{align*}
 \section{Complexity and implementation optimization}
@ -430,7 +523,10 @@ then calculate the statistics (which iterates multiple times over the array).
 \subsection{Optimal algorithm}
 \cite{bin-packing-approximation:2022}
 \sectionnn{Conclusion}
 \nocite{bin-packing-approximation:2022}
 \nocite{hofri:1987}
--- a/latex/graphics/graphic-NFBP-Ti-105-sim.png
+++ b/latex/graphics/graphic-NFBP-Ti-105-sim.png
--- a/latex/graphics/graphic-NFBP-Ti-2-sim.png
+++ b/latex/graphics/graphic-NFBP-Ti-2-sim.png
--- a/latex/graphics/graphic-NFBP-Vi-105-sim.png
+++ b/latex/graphics/graphic-NFBP-Vi-105-sim.png
--- a/latex/graphics/graphic-NFBP-Vi-2-sim.png
+++ b/latex/graphics/graphic-NFBP-Vi-2-sim.png
--- a/latex/main_variables.tex
+++ b/latex/main_variables.tex
@ -20,7 +20,7 @@
 	%\begin{center}
 	%    -\hspace{0.25cm}Version du \today\hspace{0.25cm}-
 	%\end{center}
-	Defense on June Xth 2023 % TODO
+	Defense on June 7th 2023
 }
 \def\varinsaaddress{