Compare commits

..

No commits in common. "master" and "latex" have entirely different histories.

4 changed files with 23 additions and 134 deletions

View file

@ -1,10 +1,11 @@
#!/usr/bin/python3
from random import random
from math import floor, sqrt, factorial,exp
from math import floor, sqrt, factorial
from statistics import mean, variance
from matplotlib import pyplot as plt
from pylab import *
import numpy as np
import matplotlib.pyplot as pt
def simulate_NFBP(N):
@ -61,10 +62,6 @@ def stats_NFBP_iter(R, N):
Runs R runs of NFBP (for N items) and studies distribution, variance, mean...
Calculates stats during runtime instead of after to avoid excessive memory usage.
"""
Hmean=0
Var=[]
H=[]
Exp=0
P = R * N # Total number of items
print("## Running {} NFBP simulations with {} items".format(R, N))
# number of bins
@ -75,7 +72,6 @@ def stats_NFBP_iter(R, N):
HSumVariance = [0 for _ in range(N)]
# number of items in the i-th bin
Sum_T = [0 for _ in range(N)]
TSumVariance = [0 for _ in range(N)]
# size of the first item in the i-th bin
Sum_V = [0 for _ in range(N)]
@ -93,16 +89,9 @@ def stats_NFBP_iter(R, N):
T.append(0)
V.append(0)
Sum_T = [x + y for x, y in zip(Sum_T, T)]
TSumVariance = [x + y**2 for x, y in zip(TSumVariance, T)]
Sum_V = [x + y for x, y in zip(Sum_V, V)]
Sum_T = [x / R for x in Sum_T]
print(min(Sum_T[0:20]))
print(mean(Sum_T[0:35]))
print(Sum_T[0])
TVariance = sqrt(TSumVariance[0] / (R - 1) - Sum_T[0]**2) # Variance
print(TVariance)
Sum_V = [round(x / R, 2) for x in Sum_V]
# print(Sum_V)
I = ISum / R
@ -110,26 +99,17 @@ def stats_NFBP_iter(R, N):
print("Mean number of bins : {} (variance {})".format(I, IVariance), "\n")
# TODO clarify line below
print(" {} * {} iterations of T".format(R, N), "\n")
for n in range(N):
for n in range(min(N, 10)):
Hn = HSum[n] / R # moyenne
HVariance = sqrt(HSumVariance[n] / (R - 1) - Hn**2) # Variance
Var.append(HVariance)
H.append(Hn)
print(
"Index of bin containing the {}th item (H_{}) : {} (variance {})".format(
n, n, Hn, HVariance
)
)
print(HSum)
print(len(HSum))
for x in range(len(HSum)):
Hmean+=HSum[x]
Hmean=Hmean/P
print("Hmean is : {}".format(Hmean))
Exp=np.exp(1)
HSum = [x / R for x in HSum]
HSumVariance = [x / R for x in HSumVariance]
print(HSumVariance)
# print(HSum)
# Plotting
fig = plt.figure()
# T plot
@ -146,7 +126,7 @@ def stats_NFBP_iter(R, N):
color="red",
)
ax.set(
xlim=(0, N), xticks=np.arange(0, N,N/10), ylim=(0, 3), yticks=np.linspace(0, 3, 4)
xlim=(0, N), xticks=np.arange(0, N), ylim=(0, 3), yticks=np.linspace(0, 3, 5)
)
ax.set_ylabel("Items")
ax.set_xlabel("Bins (1-{})".format(N))
@ -164,7 +144,7 @@ def stats_NFBP_iter(R, N):
color="orange",
)
bx.set(
xlim=(0, N), xticks=np.arange(0, N,N/10), ylim=(0, 1), yticks=np.linspace(0, 1, 10)
xlim=(0, N), xticks=np.arange(0, N), ylim=(0, 1), yticks=np.linspace(0, 1, 10)
)
bx.set_ylabel("First item size")
bx.set_xlabel("Bins (1-{})".format(N))
@ -183,24 +163,20 @@ def stats_NFBP_iter(R, N):
color="green",
)
cx.set(
xlim=(0, N), xticks=np.arange(0, N,N/10), ylim=(0, 10), yticks=np.linspace(0, N, 5)
xlim=(0, N), xticks=np.arange(0, N), ylim=(0, 10), yticks=np.linspace(0, N, 5)
)
cx.set_ylabel("Bin ranking of n-item")
cx.set_xlabel("n-item (1-{})".format(N))
cx.set_title("H histogram for {} items".format(P))
xb = linspace(0, N, 10)
xc=linspace(0,N,50)
yb = [Hmean for n in range(N)]
db =(( HSum[30] - HSum[1])/30)*xc
wb =(( HSumVariance[30] - HSumVariance[1])/30)*xc
cx.plot(xc, yb, label="Experimental Hn_Mean", color="brown")
cx.plot(xc, H, label="Experimental E(Hn)", color="red")
cx.plot(xc, Var, label="Experimental V(Hn)", color="purple")
yb = Hn * xb / 10
wb = HVariance * xb / 10
cx.plot(xb, yb, label="Theoretical E(Hn)", color="brown")
cx.plot(xb, wb, label="Theoretical V(Hn)", color="purple")
cx.legend(loc="upper left", title="Legend")
plt.show()
def simulate_NFDBP(N):
"""
Tries to simulate T_i, V_i and H_n for N items of random size.
@ -236,7 +212,6 @@ def stats_NFDBP(R, N, t_i):
"""
print("## Running {} NFDBP simulations with {} items".format(R, N))
# TODO comment this function
T1=[]
P = N * R # Total number of items
I = []
H = [[] for _ in range(N)] # List of empty lists
@ -253,7 +228,6 @@ def stats_NFDBP(R, N, t_i):
for k in range(N):
T.append(0)
T = sim["T"]
T1.append(sim["T"][0])
for n in range(N):
H[n].append(sim["H"][n])
Tk[n].append(sim["T"][n])
@ -263,7 +237,7 @@ def stats_NFDBP(R, N, t_i):
Sum_T = [
x * 100 / (sum(Sum_T)) for x in Sum_T
] # Pourcentage de la repartition des items
T1=[x/100 for x in T1]
print("Mean number of bins : {} (variance {})".format(mean(I), variance(I)))
for n in range(N):
@ -277,7 +251,6 @@ def stats_NFDBP(R, N, t_i):
E = 0
sigma2 = 0
# print(T_maths)
T_maths = [x * 100 for x in T_maths]
for p in range(len(T_maths)):
E = E + (p + 1) * T_maths[p]
sigma2 = ((T_maths[p] - E) ** 2) / (len(T_maths) - 1)
@ -286,7 +259,7 @@ def stats_NFDBP(R, N, t_i):
t_i, E, sqrt(sigma2)
)
)
# T_maths = [x * 100 for x in T_maths]
T_maths = [x * 100 for x in T_maths]
# Plotting
fig = plt.figure()
# T plot
@ -342,9 +315,8 @@ def stats_NFDBP(R, N, t_i):
bx.legend(loc="upper right", title="Legend")
# Loi mathematique
print("ici")
print(T_maths)
cx = fig.add_subplot(223)
cx = fig.add_subplot(224)
cx.bar(
x,
T_maths,
@ -364,30 +336,6 @@ def stats_NFDBP(R, N, t_i):
cx.set_xlabel("Bins i=(1-{})".format(N))
cx.set_title("Theoretical T{} values in %".format(t_i))
cx.legend(loc="upper right", title="Legend")
dx = fig.add_subplot(224)
dx.hist(
T1,
bins=10,
width=1,
label="Empirical values",
edgecolor="blue",
linewidth=0.7,
color="black",
)
dx.set(
xlim=(0, 10),
xticks=np.arange(0, 10,1),
ylim=(0, 100),
yticks=np.linspace(0, 100, 10),
)
dx.set_ylabel("Number of items in T1 for {} iterations")
dx.set_xlabel("{} iterations for T{}".format(R,1))
dx.set_title(
"T{} items repartition {} items (Number of items in each bin)".format(1, P)
)
dx.legend(loc="upper right", title="Legend")
plt.show()
@ -423,5 +371,3 @@ def basic_demo():
stats_NFBP_iter(10**5, 50)
print("\n\n")
stats_NFDBP(10**3, 10, 1)
print("Don't run code you don't understand or trust without a sandbox")

View file

@ -258,10 +258,12 @@ of $ T_i $. Our calculations have yielded that $ \overline{T_1} = 1.72 $ and $
{S_N}^2 = 0.88 $. Our Student coefficient is $ t_{0.95, 2} = 2 $.
We can now calculate the Confidence Interval for $ T_1 $ for $ R = 10^5 $ simulations :
\begin{align*}
IC_{95\%}(T_1) & = \left[ 1.72 \pm 1.96 \frac{\sqrt{0.88}}{\sqrt{10^5}} \cdot 2 \right] \\
& = \left[ 172 \pm 0.012 \right] \\
\end{align*}
We can see that the Confidence Interval is very small, thanks to the large number of iterations.
This results in a steady curve in figure \ref{fig:graphic-NFBP-Ti-105-sim}.
@ -272,24 +274,12 @@ This results in a steady curve in figure \ref{fig:graphic-NFBP-Ti-105-sim}.
\label{fig:graphic-NFBP-Vi-105-sim}
\end{figure}
\begin{figure}[h]
\centering
\includegraphics[width=0.8\textwidth]{graphics/graphic-NFBP-Hn-105-sim}
\caption{Histogram of $ H_n $ for $ R = 10^5 $ simulations and $ N = 50 $ items (number of bins required to store $n$ items)}
\label{fig:graphic-NFBP-Hn-105-sim}
\end{figure}
\paragraph{Asymptotic behavior of $ H_n $} Finally, we analyzed how many bins
were needed to store $ n $ items. We used the numbers from the $ R = 10^5 $ simulations.
We can see in figure \ref{fig:graphic-NFBP-Hn-105-sim} that $ H_n $ is
asymptotically linear. The expected value and the variance are also displayed.
The variance also increases linearly.
\paragraph{} The Next Fit Bin Packing algorithm is a very simple algorithm
with predictable results. It is very fast, but it is not optimal.
\section{Next Fit Dual Bin Packing algorithm (NFDBP)}
@ -338,10 +328,7 @@ new constraints on the first bin can be expressed as follows :
\text{ and } & U_1 + U_2 + \ldots + U_{k} \geq 1 \qquad \text{ with } k \geq 2 \\
\end{align*}
\subsection{Building a mathematical model}
In this section we will try to determine the probabilistic law followed by $ T_i $.
\subsection{La giga demo}
Let $ k \geq 2 $. Let $ (U_n)_{n \in \mathbb{N}^*} $ be a sequence of
independent random variables with uniform distribution on $ [0, 1] $, representing
@ -356,12 +343,12 @@ bin. We have that
Let $ A_k = \{ U_1 + U_2 + \ldots + U_{k} < 1 \}$. Hence,
\begin{align*}
\begin{align}
\label{eq:prob}
P(T_i = k)
& = P(A_{k-1} \cap A_k^c) \\
& = P(A_{k-1}) - P(A_k) \qquad \text{ (as $ A_k \subset A_{k-1} $)} \\
\end{align*}
\end{align}
We will try to show that $ \forall k \geq 1 $, $ P(A_k) = \frac{1}{k!} $. To do
so, we will use induction to prove the following proposition \eqref{eq:induction},
@ -427,18 +414,6 @@ Finally, plugging this into \eqref{eq:prob} gives us
P(T_i = k) = P(A_{k-1}) - P(A_{k}) = \frac{1}{(k-1)!} - \frac{1}{k!} \qquad \forall k \geq 2
\]
\subsection{Empirical results}
We ran $ R = 10^3 $ simulations for $ N = 10 $ items. The empirical results are
similar to the mathematical model.
\begin{figure}[h]
\centering
\includegraphics[width=1.0\textwidth]{graphics/graphic-NFDBP-T1-103-sim}
\caption{Therotical and empiric histograms of $ T_1 $ for $ R = 10^3 $ simulations and $ N = 10 $ items (number of itens in the first bin)}
\label{fig:graphic-NFDBP-T1-103-sim}
\end{figure}
\subsection{Expected value of $ T_i $}
We now compute the expected value $ \mu $ and variance $ \sigma^2 $ of $ T_i $.
@ -466,8 +441,6 @@ We now compute the expected value $ \mu $ and variance $ \sigma^2 $ of $ T_i $.
\sigma^2 = E({T_i}^2) - E(T_i)^2 = 3e - 1 - e^2
\end{align*}
$ H_n $ is asymptotically normal, following a $ \mathcal{N}(\frac{N}{\mu}, \frac{N \sigma^2}{\mu^3}) $
\section{Complexity and implementation optimization}
@ -546,43 +519,13 @@ then calculate the statistics (which iterates multiple times over the array).
between devices. Execution time and memory usage do not include the import of
libraries.}
\subsection{NFBP vs NFDBP}
\subsection{Optimal algorithm}
As we have seen, NFDBP algorithm is much better than NFBP algorithm. All the
variables excluding V are showing this. More specifically, the most relevant
variable is Hn which is growing slightly slower in the NFDBP algorithm than in
the NFBP algorithm.
Another algorithm that we did not explore in this project is the SUBP (Skim Up
Bin Packing) algorithm. It works in the same way as the NFDBP algorithm.
However, when an item exceeds the box size, it is removed from the current bin
and placed into the next bin. This algorithm that we could not exploit is much
more efficient than both of the previous algorithms. His main issue is that it
takes a lot of storage and requires higher capacities.
We redirect you towards this video which demonstrates why another algorithm is
actually the most efficient that we can imagine. In this video we see that the
mostoptimized of alrogithm is another version of NFBP where we sort the items
in a decreasing order before sending them into the different bins.
\clearpage
\sectionnn{Conclusion}
In this project, we explored many bin packing algorithms in 1 dimension. We
discovered how some bin packing algorithms can be really simple to implement
but also a strong data consumer as the NFBP algorithm.
By modifying the conditions of bin packing we can upgrade our performances. For
example, the NFDBP doest not permit to close the boxes (which depend of the
context of this implementation). The performance analysis conclusions are the
consequences of a precise statistical and probabilistic study that we have leaded
on this project.
To go further, we could now think about the best applications of different
algorithms in real contexts, thanks to simulations.
\nocite{bin-packing-approximation:2022}
\nocite{hofri:1987}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB