This commit is contained in:
Paul ALNET 2023-06-04 23:28:52 +02:00
commit 1f279bf2b9
7 changed files with 165 additions and 69 deletions

View file

@ -420,7 +420,7 @@ def basic_demo():
) )
stats_NFBP_iter(2 *10**0, 50) stats_NFBP_iter(10**5, 50)
print("\n\n") print("\n\n")
stats_NFDBP(10**3, 10, 1) stats_NFDBP(10**3, 10, 1)

View file

@ -180,28 +180,107 @@ Mathematically, the NFBP algorithm imposes the following constraint on the first
We implemented the NFBP algorithm in Python \footnotemark, for its ease of use We implemented the NFBP algorithm in Python \footnotemark, for its ease of use
and broad recommendation. We used the \texttt{random} library to generate and broad recommendation. We used the \texttt{random} library to generate
random numbers between $ 0 $ and $ 1 $ and \texttt{matplotlib} to plot the random numbers between $ 0 $ and $ 1 $ and \texttt{matplotlib} to plot the
results in the form of histograms. We ran $ R = 10^6 $ simulations with results in the form of histograms.
$ N = 10 $ different items each.
\footnotetext{The code is available in Annex \ref{annex:probabilistic}} \footnotetext{The code is available in Annex \ref{annex:probabilistic}}
\paragraph{Distribution of $ T_i $} We first studied how many items were We will try to approximate $ \mathbb{E}[R] $ and $ \mathbb{E}[V] $ with $
present per bin. \overline{X_N} $ using $ {S_n}^2 $. This operation will be done for both $ R =
2 $ and $ R = 10^6 $ simulations.
\paragraph{Distribution of $ V_i $} We then looked at the size of the first \[
item in each bin. \overline{X_N} = \frac{1}{N} \sum_{i=1}^{N} X_i
\]
As the variance value is unknown, we will use $ {S_n}^2 $ to estimate the
variance and further determine the Confidence Interval (95 \% certainty).
\begin{align*}
{S_N}^2 & = \frac{1}{N-1} \sum_{i=1}^{N} (X_i - \overline{X_N})^2 \\
IC_{95\%}(m) & = \left[ \overline{X_N} \pm \frac{S_N}{\sqrt{N}} \cdot t_{1 - \frac{\alpha}{2}, N-1} \right] \\
\end{align*}
\paragraph{2 simulations} We first ran $ R = 2 $ simulations to observe the
behavior of the algorithm and the low precision of the results.
\begin{figure}[h]
\centering
\includegraphics[width=0.8\textwidth]{graphics/graphic-NFBP-Ti-2-sim}
\caption{Histogram of $ T_i $ for $ R = 2 $ simulations and $ N = 50 $ items (number of items per bin)}
\label{fig:graphic-NFBP-Ti-2-sim}
\end{figure}
On this graph (figure \ref{fig:graphic-NFBP-Ti-2-sim}), we can see each value
of $ T_i $. Our calculations have yielded that $ \overline{T_1} = 1.0 $ and $
{S_N}^2 = 2.7 $. Our Student coefficient is $ t_{0.95, 2} = 4.303 $.
We can now calculate the Confidence Interval for $ T_1 $ for $ R = 2 $ simulations :
\begin{align*}
IC_{95\%}(T_1) & = \left[ 1.0 \pm 1.96 \frac{\sqrt{2.7}}{\sqrt{2}} \cdot 4.303 \right] \\
& = \left[ 1 \pm 9.8 \right] \\
\end{align*}
We can see that the Confidence Interval is very large, which is due to the low
number of simulations. Looking at figure \ref{fig:graphic-NFBP-Ti-2-sim}, we
easily notice the high variance.
\begin{figure}[h]
\centering
\includegraphics[width=0.8\textwidth]{graphics/graphic-NFBP-Vi-2-sim}
\caption{Histogram of $ V_i $ for $ R = 2 $ simulations and $ N = 50 $ items (size of the first item in a bin)}
\label{fig:graphic-NFBP-Vi-2-sim}
\end{figure}
On the graph of $ V_i $ (figure \ref{fig:graphic-NFBP-Vi-2-sim}), we can see
that the sizes are scattered pseudo-randomly between $ 0 $ and $ 1 $, which is
unsuprising given the low number of simulations. The process determinig the statistics
is the same as for $ T_i $, yielding $ \overline{V_1} = 0.897 $, $ {S_N}^2 =
0.2 $ and $ IC_{95\%}(V_1) = \left[ 0.897 \pm 1.3 \right] $. In this particular run,
the two values for $ V_1 $ are high (being bouded between $ 0 $ and $ 1 $).
\paragraph{100 000 simulations} In order to ensure better precision, we then
ran $ R = 10^5 $ simulations with $ N = 50 $ different items each.
\begin{figure}[h]
\centering
\includegraphics[width=0.8\textwidth]{graphics/graphic-NFBP-Ti-105-sim}
\caption{Histogram of $ T_i $ for $ R = 10^5 $ simulations and $ N = 50 $ items (number of items per bin)}
\label{fig:graphic-NFBP-Ti-105-sim}
\end{figure}
On this graph (figure \ref{fig:graphic-NFBP-Ti-2-sim}), we can see each value
of $ T_i $. Our calculations have yielded that $ \overline{T_1} = 1.72 $ and $
{S_N}^2 = 0.88 $. Our Student coefficient is $ t_{0.95, 2} = 2 $.
We can now calculate the Confidence Interval for $ T_1 $ for $ R = 10^5 $ simulations :
\begin{align*}
IC_{95\%}(T_1) & = \left[ 1.72 \pm 1.96 \frac{\sqrt{0.88}}{\sqrt{10^5}} \cdot 2 \right] \\
& = \left[ 172 \pm 0.012 \right] \\
\end{align*}
We can see that the Confidence Interval is very small, thanks to the large number of iterations.
This results in a steady curve in figure \ref{fig:graphic-NFBP-Ti-105-sim}.
\begin{figure}[h]
\centering
\includegraphics[width=0.8\textwidth]{graphics/graphic-NFBP-Vi-105-sim}
\caption{Histogram of $ V_i $ for $ R = 10^5 $ simulations and $ N = 50 $ items (size of the first item in a bin)}
\label{fig:graphic-NFBP-Vi-105-sim}
\end{figure}
\paragraph{Asymptotic behavior of $ H_n $} Finally, we analyzed how many bins \paragraph{Asymptotic behavior of $ H_n $} Finally, we analyzed how many bins
were needed to store $ n $ items. were needed to store $ n $ items. We used the numbers from the $ R = 10^5 $ simulations.
% TODO histograms
% TODO analysis histograms
\cite{hofri:1987}
% TODO mettre de l'Histoire
\section{Next Fit Dual Bin Packing algorithm (NFDBP)} \section{Next Fit Dual Bin Packing algorithm (NFDBP)}
@ -251,81 +330,53 @@ new constraints on the first bin can be expressed as follows :
\subsection{La giga demo} \subsection{La giga demo}
Let $ k \in \mathbb{N} $. Let $ (U_n)_{n \in \mathbb{N}} $ be a sequence of Let $ k \geq 2 $. Let $ (U_n)_{n \in \mathbb{N}^*} $ be a sequence of
independent random variables with uniform distribution on $ [0, 1] $, representing independent random variables with uniform distribution on $ [0, 1] $, representing
the size of the $ n $-th item. the size of the $ n $-th item.
Let $ i \in \mathbb{N} $. $ T_i $ denotes the number of items in the $ i $-th Let $ i \in \mathbb{N} $. $ T_i $ denotes the number of items in the $ i $-th
bin. We have that bin. We have that
\begin{equation} \begin{equation*}
T_i = k \iff U_1 + U_2 + \ldots + U_{k-1} < 1 \text{ and } U_1 + U_2 + \ldots + U_{k} \geq 1 T_i = k \iff U_1 + U_2 + \ldots + U_{k-1} < 1 \text{ and } U_1 + U_2 + \ldots + U_{k} \geq 1
\end{equation} \end{equation*}
Let $ A_k = \{ U_1 + U_2 + \ldots + U_{k-1} < 1 \}$. Hence, Let $ A_k = \{ U_1 + U_2 + \ldots + U_{k} < 1 \}$. Hence,
\begin{align*} \begin{align}
% TODO = k \label{eq:prob}
P(T_i = k) P(T_i = k)
& = P(A_{k-1} \cap A_k^c) \\ & = P(A_{k-1} \cap A_k^c) \\
& = P(A_{k-1}) - P(A_k) \qquad \text{ (as $ A_k \subset A_{k-1} $)} \\ & = P(A_{k-1}) - P(A_k) \qquad \text{ (as $ A_k \subset A_{k-1} $)} \\
\end{align*} \end{align}
We will try to show that $ \forall k \geq 2 $, $ P(A_k) = \frac{1}{k!} $. To do We will try to show that $ \forall k \geq 1 $, $ P(A_k) = \frac{1}{k!} $. To do
so, we will use induction to prove the following proposition \eqref{eq:induction}, so, we will use induction to prove the following proposition \eqref{eq:induction},
$ \forall k \geq 2 $: $ \forall k \geq 1 $:
\begin{equation} \begin{equation}
\label{eq:induction} \label{eq:induction}
\tag{$ \mathcal{H}_k $} \tag{$ \mathcal{H}_k $}
P(U_1 + U_2 + \ldots + U_{k-1} < a) = \frac{a^k}{k!} \qquad \forall a \in [0, 1], P(U_1 + U_2 + \ldots + U_{k} < a) = \frac{a^k}{k!} \qquad \forall a \in [0, 1],
\end{equation} \end{equation}
Let us denote $ S_k = U_1 + U_2 + \ldots + U_{k-1} \qquad \forall k \geq 2 $. Let us denote $ S_k = U_1 + U_2 + \ldots + U_{k} \qquad \forall k \geq 1 $.
\paragraph{Base cases} $ k = 2 $ : $ P(U_1 < a) = a \neq \frac{a^2}{2}$ supposedly proving $ (\mathcal{H}_2) $. \paragraph{Base case} $ k = 1 $ : $ P(U_1 < a) = a = \frac{a^1}{1!}$, proving $ (\mathcal{H}_1) $.
$ k = 2 $ : \[ P(U_1 + U_2 < a) = \iint_{\cal{D}} f_{U_1, U_2}(x, y) \cdot (x + y) dxdy \] \paragraph{Induction step} Let $ k \geq 2 $. We assume $ (\mathcal{H}_{k-1}) $ is
true. We will show that $ (\mathcal{H}_{k}) $ is true.
Where $ \mathcal{D} = \{ (x, y) \in [0, 1]^2 \mid x + y < a \} $.
$ U_1 $ and $ U_2 $ are independent, so
\begin{align*}
f_{U_1, U_2}(x, y) & = f_{U_1}(x) \cdot f_{U_2}(y) \\
& = \begin{cases}
1 & \text{if } x \in [0, 1] \text{ and } y \in [0, 1] \\
0 & \text{otherwise} \\
\end{cases} \\
\end{align*}
Hence,
\begin{align*} \begin{align*}
P(U_1 + U_2 < a) P(S_k < a) & = P(S_{k-1} + U_k < a) \\
& = \iint_{\cal{D}} (x + y)dxdy \\ & = \iint_{\cal{D}} f_{S_{k-1}, U_k}(x, y) dxdy \\
& = \int_{0}^{a} \int_{0}^{a - x} (x + y) dy dx \\ \text{Where } \mathcal{D} & = \{ (x, y) \in [0, 1]^2 \mid x + y < a \} \\
& = \int_{0}^{a} \left[ xy + \frac{y^2}{2} \right]_{y=0}^{y=a - x} dx \\ & = \{ (x, y) \in [0, 1]^2 \mid 0 < x < a \text{ and } 0 < y < a - x \} \\
& = \int_{0}^{a} \left( ax - x^2 + \frac{a^2}{2} - ax + \frac{x^2}{2} \right) dx \\ P(S_k < a) & = \iint_{\cal{D}} f_{S_{k-1}}(x) \cdot f_{U_k}(y) dxdy \qquad
& = \int_{0}^{a} \left( \frac{a^2}{2} - \frac{x^2}{2} \right) dx \\ \text{because $ S_{k-1} $ and $ U_k $ are independent} \\
& = \left[ \frac{a^2 x}{2} - \frac{x^3}{6} \right]_{0}^{a} \\ & = \int_{0}^{a} f_{S_{k-1}}(x) \cdot \left( \int_{0}^{a-x} f_{U_k}(y) dy \right) dx \\
& = \frac{a^3}{2} - \frac{a^3}{6} \\
\end{align*} \end{align*}
\paragraph{Induction step} For a fixed $ k > 2 $, we assume that $
(\mathcal{H}_{k-1}) $ is true. We will try to prove $ (\mathcal{H}_{k}) $.
\[
P(S_{k-1} + U_{k-1} < a)
= \iint_{\cal{D}} f_{S_{k-1}, U_{k-1}}(x, y) \cdot (x + y) dxdy \\
\]
where $ \mathcal{D} = \{ (x, y) \in [0, 1]^2 \mid x + y < a \} $.
As $ S_{k-1} $ and $ U_{k-1} $ are independent,
\[
P(S_{k-1} + U_{k-1} < a)
= \iint_{\cal{D}} f_{S_{k-1}}(x) \cdot f_{U_{k-1}}(y) \cdot (x + y) dxdy \qquad \\
\]
$ (\mathcal{H}_{k-1}) $ gives us that $ \forall x \in [0, 1] $, $ (\mathcal{H}_{k-1}) $ gives us that $ \forall x \in [0, 1] $,
$ F_{S_{k-1}}(x) = P(S_{k-1} < x) = \frac{x^{k-1}}{(k-1)!} $. $ F_{S_{k-1}}(x) = P(S_{k-1} < x) = \frac{x^{k-1}}{(k-1)!} $.
@ -336,17 +387,59 @@ By differentiating, we get that $ \forall x \in [0, 1] $,
\] \]
Furthermore, $ U_{k-1} $ is uniformly distributed on $ [0, 1] $, so Furthermore, $ U_{k-1} $ is uniformly distributed on $ [0, 1] $, so
$ f_{U_{k-1}}(y) = 1 $. $ f_{U_{k-1}}(y) = 1 $. We can then integrate by parts :
\begin{align*} \begin{align*}
\text{Hence, } P(S_k < a)
P(S_{k-1} + U_{k-1} < a) & = \int_{0}^{a} f_{S_{k-1}}(x) \cdot \left( \int_{0}^{a-x} 1 dy \right) dx \\
& = & = \int_{0}^{a} f_{S_{k-1}}(x) \cdot (a - x) dx \\
& = \frac{a^{k}}{k!} & = a \int_{0}^{a} f_{S_{k-1}}(x) dx - \int_{0}^{a} x f_{S_{k-1}}(x) dx \\
& = a \int_0^a F'_{S_{k-1}}(x) dx - \left[ x F_{S_{k-1}}(x) \right]_0^a
+ \int_{0}^{a} F_{S_{k-1}}(x) dx \qquad \text{(IPP : }x, F_{S_{k-1}} \in C^1([0,1]) \\
& = a \left[ F_{S_{k-1}}(x) \right]_0^a - \left[ x F_{S_{k-1}}(x) \right]_0^a
+ \int_{0}^{a} \frac{x^{k-1}}{(k-1)!} dx \\
& = \left[ \frac{x^k}{k!} \right]_0^a \\
& = \frac{a^k}{k!} \\
\end{align*} \end{align*}
\paragraph{Conclusion} We have shown that $ (\mathcal{H}_{k}) $ is true, so by induction, $ \forall k \geq 1 $,
$ \forall a \in [0, 1] $, $ P(U_1 + U_2 + \ldots + U_{k} < a) = \frac{a^k}{k!} $. Take
$ a = 1 $ to get
\[ P(U_1 + U_2 + \ldots + U_{k} < 1) = \frac{1}{k!} \]
Finally, plugging this into \eqref{eq:prob} gives us
\[
P(T_i = k) = P(A_{k-1}) - P(A_{k}) = \frac{1}{(k-1)!} - \frac{1}{k!} \qquad \forall k \geq 2
\]
\subsection{Expected value of $ T_i $}
We now compute the expected value $ \mu $ and variance $ \sigma^2 $ of $ T_i $.
\begin{align*}
\mu = E(T_i) & = \sum_{k=2}^{\infty} k \cdot P(T_i = k) \\
& = \sum_{k=2}^{\infty} (\frac{k}{(k-1)!} - \frac{1}{(k-1)!}) \\
& = \sum_{k=2}^{\infty} \frac{k-1}{(k-1)!} \\
& = \sum_{k=0}^{\infty} \frac{1}{k!} \\
& = e \\
\end{align*}
\begin{align*}
E({T_i}^2) & = \sum_{k=2}^{\infty} k^2 \cdot P(T_i = k) \\
& = \sum_{k=2}^{\infty} (\frac{k^2}{(k-1)!} - \frac{k}{(k-1)!}) \\
& = \sum_{k=2}^{\infty} \frac{(k-1)k}{(k-1)!} \\
& = \sum_{k=2}^{\infty} \frac{k}{(k-2)!} \\
& = \sum_{k=0}^{\infty} \frac{k+2}{k!} \\
& = \sum_{k=0}^{\infty} (\frac{1}{(k-1)!} + \frac{2}{(k)!}) \\
& = \sum_{k=0}^{\infty} \frac{1}{(k)!} - 1 + 2e \\
& = 3e - 1
\end{align*}
\begin{align*}
\sigma^2 = E({T_i}^2) - E(T_i)^2 = 3e - 1 - e^2
\end{align*}
\section{Complexity and implementation optimization} \section{Complexity and implementation optimization}
@ -430,7 +523,10 @@ then calculate the statistics (which iterates multiple times over the array).
\subsection{Optimal algorithm} \subsection{Optimal algorithm}
\cite{bin-packing-approximation:2022}
\sectionnn{Conclusion} \sectionnn{Conclusion}
\nocite{bin-packing-approximation:2022}
\nocite{hofri:1987}

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

View file

@ -20,7 +20,7 @@
%\begin{center} %\begin{center}
% -\hspace{0.25cm}Version du \today\hspace{0.25cm}- % -\hspace{0.25cm}Version du \today\hspace{0.25cm}-
%\end{center} %\end{center}
Defense on June Xth 2023 % TODO Defense on June 7th 2023
} }
\def\varinsaaddress{ \def\varinsaaddress{