tex: final commit I suppose (I wish)

merge
jjj
2023-06-05 00:08:48 +02:00 · 2023-06-04 23:28:52 +02:00 · 2023-06-04 23:12:47 +02:00 · 2023-06-04 15:50:31 +02:00 · 2023-06-04 13:40:26 +02:00 · 2023-06-04 11:20:44 +02:00
4 changed files with 134 additions and 23 deletions
--- a/Probas.py
+++ b/Probas.py
@ -1,11 +1,10 @@
 #!/usr/bin/python3
 from random import random
-from math import floor, sqrt, factorial
+from math import floor, sqrt, factorial,exp
 from statistics import mean, variance
 from matplotlib import pyplot as plt
 from pylab import *
 import numpy as np
-import matplotlib.pyplot as pt


 def simulate_NFBP(N):
@ -62,6 +61,10 @@ def stats_NFBP_iter(R, N):
    Runs R runs of NFBP (for N items) and studies distribution, variance, mean...
    Calculates stats during runtime instead of after to avoid excessive memory usage.
    """
+    Hmean=0
+    Var=[]
+    H=[]
+    Exp=0
    P = R * N  # Total number of items
    print("## Running {} NFBP simulations with {} items".format(R, N))
    # number of bins
@ -72,6 +75,7 @@ def stats_NFBP_iter(R, N):
    HSumVariance = [0 for _ in range(N)]
    # number of items in the i-th bin
    Sum_T = [0 for _ in range(N)]
+    TSumVariance = [0 for _ in range(N)]
    # size of the first item in the i-th bin
    Sum_V = [0 for _ in range(N)]

@ -89,9 +93,16 @@ def stats_NFBP_iter(R, N):
            T.append(0)
            V.append(0)
        Sum_T = [x + y for x, y in zip(Sum_T, T)]
+        TSumVariance = [x + y**2 for x, y in zip(TSumVariance, T)]
        Sum_V = [x + y for x, y in zip(Sum_V, V)]

    Sum_T = [x / R for x in Sum_T]
+    print(min(Sum_T[0:20]))
+    print(mean(Sum_T[0:35]))
+    print(Sum_T[0])
+    TVariance = sqrt(TSumVariance[0] / (R - 1) - Sum_T[0]**2)  # Variance
+    print(TVariance)
+
    Sum_V = [round(x / R, 2) for x in Sum_V]
    # print(Sum_V)
    I = ISum / R
@ -99,17 +110,26 @@ def stats_NFBP_iter(R, N):
    print("Mean number of bins : {} (variance {})".format(I, IVariance), "\n")
    # TODO clarify line below
    print(" {} * {} iterations of T".format(R, N), "\n")
-
-    for n in range(min(N, 10)):
+    for n in range(N):
        Hn = HSum[n] / R  # moyenne
        HVariance = sqrt(HSumVariance[n] / (R - 1) - Hn**2)  # Variance
+        Var.append(HVariance)
+        H.append(Hn)
        print(
            "Index of bin containing the {}th item (H_{}) : {} (variance {})".format(
                n, n, Hn, HVariance
            )
        )
+    print(HSum)
+    print(len(HSum))
+    for x in range(len(HSum)):
+        Hmean+=HSum[x]
+    Hmean=Hmean/P
+    print("Hmean is : {}".format(Hmean))
+    Exp=np.exp(1)
    HSum = [x / R for x in HSum]
-    # print(HSum)
+    HSumVariance = [x / R for x in HSumVariance]
+    print(HSumVariance)
    # Plotting
    fig = plt.figure()
    # T plot
@ -126,7 +146,7 @@ def stats_NFBP_iter(R, N):
        color="red",
    )
    ax.set(
-        xlim=(0, N), xticks=np.arange(0, N), ylim=(0, 3), yticks=np.linspace(0, 3, 5)
+            xlim=(0, N), xticks=np.arange(0, N,N/10), ylim=(0, 3), yticks=np.linspace(0, 3, 4)
    )
    ax.set_ylabel("Items")
    ax.set_xlabel("Bins (1-{})".format(N))
@ -144,7 +164,7 @@ def stats_NFBP_iter(R, N):
        color="orange",
    )
    bx.set(
-        xlim=(0, N), xticks=np.arange(0, N), ylim=(0, 1), yticks=np.linspace(0, 1, 10)
+        xlim=(0, N), xticks=np.arange(0, N,N/10), ylim=(0, 1), yticks=np.linspace(0, 1, 10)
    )
    bx.set_ylabel("First item size")
    bx.set_xlabel("Bins (1-{})".format(N))
@ -163,19 +183,23 @@ def stats_NFBP_iter(R, N):
        color="green",
    )
    cx.set(
-        xlim=(0, N), xticks=np.arange(0, N), ylim=(0, 10), yticks=np.linspace(0, N, 5)
+        xlim=(0, N), xticks=np.arange(0, N,N/10), ylim=(0, 10), yticks=np.linspace(0, N, 5)
    )
    cx.set_ylabel("Bin ranking of n-item")
    cx.set_xlabel("n-item (1-{})".format(N))
    cx.set_title("H histogram for {} items".format(P))
    xb = linspace(0, N, 10)
-    yb = Hn * xb / 10
-    wb = HVariance * xb / 10
-    cx.plot(xb, yb, label="Theoretical E(Hn)", color="brown")
-    cx.plot(xb, wb, label="Theoretical V(Hn)", color="purple")
+    xc=linspace(0,N,50)
+    yb =  [Hmean for n in range(N)]
+    db =(( HSum[30] - HSum[1])/30)*xc
+    wb =(( HSumVariance[30] - HSumVariance[1])/30)*xc
+    cx.plot(xc, yb, label="Experimental Hn_Mean", color="brown")
+    cx.plot(xc, H, label="Experimental E(Hn)", color="red")
+    cx.plot(xc, Var, label="Experimental V(Hn)", color="purple")
    cx.legend(loc="upper left", title="Legend")
-    plt.show()
+   

+    plt.show()

 def simulate_NFDBP(N):
    """
@ -212,6 +236,7 @@ def stats_NFDBP(R, N, t_i):
    """
    print("## Running {} NFDBP simulations with {} items".format(R, N))
    # TODO comment this function
+    T1=[]
    P = N * R  # Total number of items
    I = []
    H = [[] for _ in range(N)]  # List of empty lists
@ -228,6 +253,7 @@ def stats_NFDBP(R, N, t_i):
        for k in range(N):
            T.append(0)
            T = sim["T"]
+            T1.append(sim["T"][0])
        for n in range(N):
            H[n].append(sim["H"][n])
            Tk[n].append(sim["T"][n])
@ -237,7 +263,7 @@ def stats_NFDBP(R, N, t_i):
    Sum_T = [
        x * 100 / (sum(Sum_T)) for x in Sum_T
    ]  # Pourcentage de la repartition des items
-
+    T1=[x/100 for x in T1]
    print("Mean number of bins : {} (variance {})".format(mean(I), variance(I)))

    for n in range(N):
@ -251,6 +277,7 @@ def stats_NFDBP(R, N, t_i):
    E = 0
    sigma2 = 0
    # print(T_maths)
+    T_maths = [x * 100 for x in T_maths]
    for p in range(len(T_maths)):
        E = E + (p + 1) * T_maths[p]
        sigma2 = ((T_maths[p] - E) ** 2) / (len(T_maths) - 1)
@ -259,7 +286,7 @@ def stats_NFDBP(R, N, t_i):
            t_i, E, sqrt(sigma2)
        )
    )
-    T_maths = [x * 100 for x in T_maths]
+   # T_maths = [x * 100 for x in T_maths]
    # Plotting
    fig = plt.figure()
    # T plot
@ -315,8 +342,9 @@ def stats_NFDBP(R, N, t_i):
    bx.legend(loc="upper right", title="Legend")

    # Loi mathematique
+    print("ici")
    print(T_maths)
-    cx = fig.add_subplot(224)
+    cx = fig.add_subplot(223)
    cx.bar(
        x,
        T_maths,
@ -336,6 +364,30 @@ def stats_NFDBP(R, N, t_i):
    cx.set_xlabel("Bins i=(1-{})".format(N))
    cx.set_title("Theoretical T{} values in %".format(t_i))
    cx.legend(loc="upper right", title="Legend")
+   
+    dx = fig.add_subplot(224)
+    dx.hist(
+        T1,
+        bins=10,
+        width=1,
+        label="Empirical values",
+        edgecolor="blue",
+        linewidth=0.7,
+        color="black",
+    )
+    dx.set(
+        xlim=(0, 10),
+        xticks=np.arange(0, 10,1),
+        ylim=(0, 100),
+        yticks=np.linspace(0, 100, 10),
+    )
+    dx.set_ylabel("Number of items in T1 for {} iterations")
+    dx.set_xlabel("{} iterations for T{}".format(R,1))
+    dx.set_title(
+        "T{} items repartition {} items (Number of items in each bin)".format(1, P)
+    )
+    dx.legend(loc="upper right", title="Legend")
+
    plt.show()


@ -371,3 +423,5 @@ def basic_demo():
 stats_NFBP_iter(10**5, 50)
 print("\n\n")
 stats_NFDBP(10**3, 10, 1)
+
+print("Don't run code you don't understand or trust without a sandbox")
--- a/latex/content.tex
+++ b/latex/content.tex
@ -258,12 +258,10 @@ of $ T_i $. Our calculations have yielded that $ \overline{T_1} = 1.72 $ and $
 	{S_N}^2 = 0.88 $. Our Student coefficient is $ t_{0.95, 2} = 2 $.

 We can now calculate the Confidence Interval for $ T_1 $ for $ R = 10^5 $ simulations :
-
 \begin{align*}
 	IC_{95\%}(T_1) & = \left[ 1.72 \pm 1.96 \frac{\sqrt{0.88}}{\sqrt{10^5}} \cdot 2 \right] \\
 	               & = \left[ 172 \pm 0.012 \right]                                         \\
 \end{align*}
-
 We can see that the Confidence Interval is very small, thanks to the large number of iterations.
 This results in a steady curve in figure \ref{fig:graphic-NFBP-Ti-105-sim}.

@ -274,12 +272,24 @@ This results in a steady curve in figure \ref{fig:graphic-NFBP-Ti-105-sim}.
 	\label{fig:graphic-NFBP-Vi-105-sim}
 \end{figure}

+\begin{figure}[h]
+	\centering
+	\includegraphics[width=0.8\textwidth]{graphics/graphic-NFBP-Hn-105-sim}
+	\caption{Histogram of $ H_n $ for $ R = 10^5 $ simulations and $ N = 50 $ items (number of bins required to store $n$ items)}
+	\label{fig:graphic-NFBP-Hn-105-sim}
+\end{figure}
+
 \paragraph{Asymptotic behavior of $ H_n $} Finally, we analyzed how many bins
 were needed to store $ n $ items. We used the numbers from the $ R = 10^5 $ simulations.


+We can see in figure \ref{fig:graphic-NFBP-Hn-105-sim} that $ H_n $ is
+asymptotically linear. The expected value and the variance are also displayed.
+The variance also increases linearly.


+\paragraph{} The Next Fit Bin Packing algorithm is a very simple algorithm
+with predictable results. It is very fast, but it is not optimal.


 \section{Next Fit Dual Bin Packing algorithm (NFDBP)}
@ -328,7 +338,10 @@ new constraints on the first bin can be expressed as follows :
 	\text{ and } & U_1 + U_2 + \ldots + U_{k}    \geq 1 \qquad \text{ with } k \geq 2 \\
 \end{align*}

-\subsection{La giga demo}
+
+\subsection{Building a mathematical model}
+
+In this section we will try to determine the probabilistic law followed by $ T_i $.

 Let $ k \geq 2 $. Let $ (U_n)_{n \in \mathbb{N}^*} $ be a sequence of
 independent random variables with uniform distribution on $ [0, 1] $, representing
@ -343,12 +356,12 @@ bin. We have that

 Let $ A_k = \{ U_1 + U_2 + \ldots + U_{k} < 1 \}$. Hence,

-\begin{align}
+\begin{align*}
 	\label{eq:prob}
 	P(T_i = k)
 	 & = P(A_{k-1} \cap A_k^c)                                           \\
 	 & = P(A_{k-1}) - P(A_k) \qquad \text{ (as $ A_k \subset A_{k-1} $)} \\
-\end{align}
+\end{align*}

 We will try to show that $ \forall k \geq 1 $, $ P(A_k) = \frac{1}{k!} $. To do
 so, we will use induction to prove the following proposition \eqref{eq:induction},
@ -414,6 +427,18 @@ Finally, plugging this into \eqref{eq:prob} gives us
 	P(T_i = k) = P(A_{k-1}) - P(A_{k}) = \frac{1}{(k-1)!} - \frac{1}{k!} \qquad \forall k \geq 2
 \]

+\subsection{Empirical results}
+
+We ran $ R = 10^3 $ simulations for $ N = 10 $ items. The empirical results are
+similar to the mathematical model.
+
+\begin{figure}[h]
+	\centering
+	\includegraphics[width=1.0\textwidth]{graphics/graphic-NFDBP-T1-103-sim}
+	\caption{Therotical and empiric histograms of $ T_1 $ for $ R = 10^3 $ simulations and $ N = 10 $ items (number of itens in the first bin)}
+	\label{fig:graphic-NFDBP-T1-103-sim}
+\end{figure}
+
 \subsection{Expected value of $ T_i $}

 We now compute the expected value $ \mu $ and variance $ \sigma^2 $ of $ T_i $.
@ -441,6 +466,8 @@ We now compute the expected value $ \mu $ and variance $ \sigma^2 $ of $ T_i $.
 	\sigma^2 = E({T_i}^2) - E(T_i)^2 = 3e - 1 - e^2
 \end{align*}

+$ H_n $ is asymptotically normal, following a $ \mathcal{N}(\frac{N}{\mu}, \frac{N \sigma^2}{\mu^3}) $
+

 \section{Complexity and implementation optimization}

@ -519,13 +546,43 @@ then calculate the statistics (which iterates multiple times over the array).
 	between devices. Execution time and memory usage do not include the import of
 	libraries.}

-\subsection{NFBP vs NFDBP}
-
 \subsection{Optimal algorithm}

+As we have seen, NFDBP algorithm is much better than NFBP algorithm. All the
+variables excluding V are showing this. More specifically, the most relevant
+variable is Hn which is growing slightly slower in the NFDBP algorithm than in
+the NFBP algorithm.
+
+
+Another algorithm that we did not explore in this project is the SUBP (Skim Up
+Bin Packing) algorithm. It works in the same way as the NFDBP algorithm.
+However, when an item exceeds the box size, it is removed from the current bin
+and placed into the next bin. This algorithm that we could not exploit is much
+more efficient than both of the previous algorithms. His main issue is that it
+takes a lot of storage and requires higher capacities.
+
+We redirect you towards this video which demonstrates why another algorithm is
+actually the most efficient that we can imagine. In this video we see that the
+mostoptimized of alrogithm is another version of NFBP where we sort the items
+in a decreasing order before sending them into the different bins.
+
+\clearpage

 \sectionnn{Conclusion}

+In this project, we explored many bin packing algorithms in 1 dimension. We
+discovered how some bin packing algorithms can be really simple to implement
+but also a strong data consumer as the NFBP algorithm.
+
+By modifying the conditions of bin packing we can upgrade our performances. For
+example, the NFDBP doest not permit to close the boxes (which depend of the
+context of this implementation). The performance analysis conclusions are the
+consequences of a precise statistical and probabilistic study that we have leaded
+on this project.
+
+To go further, we could now think about the best applications of different
+algorithms in real contexts, thanks to simulations.
+

 \nocite{bin-packing-approximation:2022}
 \nocite{hofri:1987}
--- a/latex/graphics/graphic-NFBP-Hn-105-sim.png
+++ b/latex/graphics/graphic-NFBP-Hn-105-sim.png
--- a/latex/graphics/graphic-NFDBP-T1-103-sim.png
+++ b/latex/graphics/graphic-NFDBP-T1-103-sim.png
Author	SHA1	Message	Date
Paul ALNET	5246b1c56b	tex: final commit I suppose (I wish)	2023-06-05 00:08:48 +02:00
Paul ALNET	1f279bf2b9	merge	2023-06-04 23:28:52 +02:00
Clément Lacau	839b6f79ec	jjj	2023-06-04 23:12:47 +02:00
Paul ALNET	3a50f1d83d	chore: des prints partout	2023-06-04 15:50:31 +02:00
Paul ALNET	7c1e115951	Merge branch 'latex'	2023-06-04 13:40:26 +02:00
Paul ALNET	7dc2616096	chore: remove unused duplicate pyplot import	2023-06-04 11:20:44 +02:00
Paul ALNET	d03273baf1	feat: easter egg	2023-06-04 08:56:49 +02:00