chore: lint

2023-06-04 08:27:24 +02:00 · 2023-06-04 08:27:24 +02:00 · 316c910c3a
commit 316c910c3a
parent 6bb38429d1
1 changed files with 213 additions and 132 deletions
--- a/Probas.py
+++ b/Probas.py
@ -1,12 +1,13 @@
 #!/usr/bin/python3
 from random import random
-from math import floor, sqrt,factorial
+from math import floor, sqrt, factorial
 from statistics import mean, variance
 from matplotlib import pyplot as plt
 from pylab import *
 import numpy as np
 import matplotlib.pyplot as pt

+
 def simulate_NFBP(N):
    """
    Tries to simulate T_i, V_i and H_n for N items of random size.
@ -32,13 +33,7 @@ def simulate_NFBP(N):
            V[i] = size
        H.append(i)

-    return {
-        "i": i,
-        "R": R,
-        "T": T,
-        "V": V,
-        "H": H
-    }
+    return {"i": i, "R": R, "T": T, "V": V, "H": H}


 # unused
@ -61,12 +56,13 @@ def stats_NFBP(R, N):
    for n in range(N):
        print("Mean H_{} : {} (variance {})".format(n, mean(H[n]), variance(H[n])))

+
 def stats_NFBP_iter(R, N):
    """
    Runs R runs of NFBP (for N items) and studies distribution, variance, mean...
    Calculates stats during runtime instead of after to avoid excessive memory usage.
    """
-    P=R*N  # Total number of items
+    P = R * N  # Total number of items
    print("## Running {} NFBP simulations with {} items".format(R, N))
    # number of bins
    ISum = 0
@ -75,77 +71,112 @@ def stats_NFBP_iter(R, N):
    HSum = [0 for _ in range(N)]
    HSumVariance = [0 for _ in range(N)]
    # number of items in the i-th bin
-    Sum_T=[0 for _ in range(N)]
+    Sum_T = [0 for _ in range(N)]
    # size of the first item in the i-th bin
-    Sum_V=[0 for _ in range(N)]
+    Sum_V = [0 for _ in range(N)]

    for i in range(R):
        sim = simulate_NFBP(N)
        ISum += sim["i"]
-        IVarianceSum += sim["i"]**2
+        IVarianceSum += sim["i"] ** 2
        for n in range(N):
            HSum[n] += sim["H"][n]
-            HSumVariance[n] += sim["H"][n]**2
-        T=sim['T']
-        V=sim['V']
+            HSumVariance[n] += sim["H"][n] ** 2
+        T = sim["T"]
+        V = sim["V"]
        # ensure that T, V have the same length as Sum_T, Sum_V
-        for i in range(N - sim['i']):
+        for i in range(N - sim["i"]):
            T.append(0)
            V.append(0)
-        Sum_T=[x+y for x,y in zip(Sum_T,T)]
-        Sum_V=[x+y for x,y in zip(Sum_V,V)]
+        Sum_T = [x + y for x, y in zip(Sum_T, T)]
+        Sum_V = [x + y for x, y in zip(Sum_V, V)]

-    Sum_T=[x/R for x in Sum_T]
-    Sum_V=[round(x/R,2) for x in Sum_V]
-    #print(Sum_V)
-    I = ISum/R
-    IVariance = sqrt(IVarianceSum/(R-1) - I**2)
-    print("Mean number of bins : {} (variance {})".format(I, IVariance),'\n')
+    Sum_T = [x / R for x in Sum_T]
+    Sum_V = [round(x / R, 2) for x in Sum_V]
+    # print(Sum_V)
+    I = ISum / R
+    IVariance = sqrt(IVarianceSum / (R - 1) - I**2)
+    print("Mean number of bins : {} (variance {})".format(I, IVariance), "\n")
    # TODO clarify line below
-    print(" {} * {} iterations of T".format(R,N),'\n')
+    print(" {} * {} iterations of T".format(R, N), "\n")

    for n in range(min(N, 10)):
-        Hn = HSum[n]/R # moyenne
-        HVariance = sqrt(HSumVariance[n]/(R-1) - Hn**2) # Variance
-        print("Index of bin containing the {}th item (H_{}) : {} (variance {})".format(n, n, Hn, HVariance))
-    HSum=[x/R for x in HSum]
+        Hn = HSum[n] / R  # moyenne
+        HVariance = sqrt(HSumVariance[n] / (R - 1) - Hn**2)  # Variance
+        print(
+            "Index of bin containing the {}th item (H_{}) : {} (variance {})".format(
+                n, n, Hn, HVariance
+            )
+        )
+    HSum = [x / R for x in HSum]
    # print(HSum)
-#Plotting
+    # Plotting
    fig = plt.figure()
-    #T plot
+    # T plot
    x = np.arange(N)
    # print(x)
    ax = fig.add_subplot(221)
-    ax.bar(x,Sum_T, width=1,label='Empirical values', edgecolor="blue", linewidth=0.7,color='red')
-    ax.set(xlim=(0, N), xticks=np.arange(0, N),ylim=(0,3), yticks=np.linspace(0, 3, 5))
-    ax.set_ylabel('Items')
-    ax.set_xlabel('Bins (1-{})'.format(N))
-    ax.set_title('T histogram for {} items (Number of items in each bin)'.format(P))
-    ax.legend(loc='upper left',title='Legend')
-    #V plot
+    ax.bar(
+        x,
+        Sum_T,
+        width=1,
+        label="Empirical values",
+        edgecolor="blue",
+        linewidth=0.7,
+        color="red",
+    )
+    ax.set(
+        xlim=(0, N), xticks=np.arange(0, N), ylim=(0, 3), yticks=np.linspace(0, 3, 5)
+    )
+    ax.set_ylabel("Items")
+    ax.set_xlabel("Bins (1-{})".format(N))
+    ax.set_title("T histogram for {} items (Number of items in each bin)".format(P))
+    ax.legend(loc="upper left", title="Legend")
+    # V plot
    bx = fig.add_subplot(222)
-    bx.bar(x,Sum_V, width=1,label='Empirical values', edgecolor="blue", linewidth=0.7,color='orange')
-    bx.set(xlim=(0, N), xticks=np.arange(0, N),ylim=(0, 1), yticks=np.linspace(0, 1, 10))
-    bx.set_ylabel('First item size')
-    bx.set_xlabel('Bins (1-{})'.format(N))
-    bx.set_title('V histogram for {} items (first item size of each bin)'.format(P))
-    bx.legend(loc='upper left',title='Legend')
-    #H plot
-    #We will simulate this part for a asymptotic study
+    bx.bar(
+        x,
+        Sum_V,
+        width=1,
+        label="Empirical values",
+        edgecolor="blue",
+        linewidth=0.7,
+        color="orange",
+    )
+    bx.set(
+        xlim=(0, N), xticks=np.arange(0, N), ylim=(0, 1), yticks=np.linspace(0, 1, 10)
+    )
+    bx.set_ylabel("First item size")
+    bx.set_xlabel("Bins (1-{})".format(N))
+    bx.set_title("V histogram for {} items (first item size of each bin)".format(P))
+    bx.legend(loc="upper left", title="Legend")
+    # H plot
+    # We will simulate this part for a asymptotic study
    cx = fig.add_subplot(223)
-    cx.bar(x,HSum, width=1,label='Empirical values', edgecolor="blue", linewidth=0.7,color='green')
-    cx.set(xlim=(0, N), xticks=np.arange(0, N),ylim=(0, 10), yticks=np.linspace(0, N, 5))
-    cx.set_ylabel('Bin ranking of n-item')
-    cx.set_xlabel('n-item (1-{})'.format(N))
-    cx.set_title('H histogram for {} items'.format(P))
-    xb=linspace(0,N,10)
-    yb=Hn*xb/10
-    wb=HVariance*xb/10
-    cx.plot(xb,yb,label='Theoretical E(Hn)',color='brown')
-    cx.plot(xb,wb,label='Theoretical V(Hn)',color='purple')
-    cx.legend(loc='upper left',title='Legend')
+    cx.bar(
+        x,
+        HSum,
+        width=1,
+        label="Empirical values",
+        edgecolor="blue",
+        linewidth=0.7,
+        color="green",
+    )
+    cx.set(
+        xlim=(0, N), xticks=np.arange(0, N), ylim=(0, 10), yticks=np.linspace(0, N, 5)
+    )
+    cx.set_ylabel("Bin ranking of n-item")
+    cx.set_xlabel("n-item (1-{})".format(N))
+    cx.set_title("H histogram for {} items".format(P))
+    xb = linspace(0, N, 10)
+    yb = Hn * xb / 10
+    wb = HVariance * xb / 10
+    cx.plot(xb, yb, label="Theoretical E(Hn)", color="brown")
+    cx.plot(xb, wb, label="Theoretical V(Hn)", color="purple")
+    cx.legend(loc="upper left", title="Legend")
    plt.show()

+
 def simulate_NFDBP(N):
    """
    Tries to simulate T_i, V_i and H_n for N items of random size.
@ -172,119 +203,169 @@ def simulate_NFDBP(N):
        R[i] += size
        T[i] += 1

-    return {
-        "i": i,
-        "R": R,
-        "T": T,
-        "V": V,
-        "H": H
-    }
+    return {"i": i, "R": R, "T": T, "V": V, "H": H}


-def stats_NFDBP(R, N,t_i):
+def stats_NFDBP(R, N, t_i):
    """
    Runs R runs of NFDBP (for N items) and studies distribution, variance, mean...
    """
    print("## Running {} NFDBP simulations with {} items".format(R, N))
-    P=N*R  # Total number of items
+    # TODO comment this function
+    P = N * R  # Total number of items
    I = []
    H = [[] for _ in range(N)]  # List of empty lists
-    T=[]
-    Tk=[[] for _ in range(N)]
-    Ti=[]
-    T_maths=[]
-    #First iteration to use zip after
-    sim=simulate_NFDBP(N)
-    Sum_T=[0 for _ in range(N)]
+    T = []
+    Tk = [[] for _ in range(N)]
+    Ti = []
+    T_maths = []
+    # First iteration to use zip after
+    sim = simulate_NFDBP(N)
+    Sum_T = [0 for _ in range(N)]
    for i in range(R):
        sim = simulate_NFDBP(N)
        I.append(sim["i"])
        for k in range(N):
            T.append(0)
-            T=sim["T"]
+            T = sim["T"]
        for n in range(N):
            H[n].append(sim["H"][n])
            Tk[n].append(sim["T"][n])
        Ti.append(sim["T"])
-        Sum_T=[x+y for x,y in zip(Sum_T,T)]
-    Sum_T=[x/R for x in Sum_T] #Experimental [Ti=k]
-    Sum_T=[x*100/(sum(Sum_T)) for x in Sum_T] #Pourcentage de la repartition des items 
+        Sum_T = [x + y for x, y in zip(Sum_T, T)]
+    Sum_T = [x / R for x in Sum_T]  # Experimental [Ti=k]
+    Sum_T = [
+        x * 100 / (sum(Sum_T)) for x in Sum_T
+    ]  # Pourcentage de la repartition des items

    print("Mean number of bins : {} (variance {})".format(mean(I), variance(I)))

    for n in range(N):
        print("Mean H_{} : {} (variance {})".format(n, mean(H[n]), variance(H[n])))
    print("Mean T_{} : {} (variance {})".format(k, mean(Sum_T), variance(Sum_T)))
-    #Loi math
+    # Loi math
    for u in range(N):
-        u=u+2
-        T_maths.append(1/(factorial(u-1))-1/factorial(u))
-    E=0
-    sigma2=0
+        u = u + 2
+        T_maths.append(1 / (factorial(u - 1)) - 1 / factorial(u))
+    E = 0
+    sigma2 = 0
    # print(T_maths)
    for p in range(len(T_maths)):
-        E=E+(p+1)*T_maths[p]
-        sigma2=((T_maths[p]-E)**2)/(len(T_maths)-1)
-    print("Mathematical values : Empiric mean T_{} : {} Variance {})".format(t_i, E, sqrt(sigma2)))
-    T_maths=[x*100 for x in T_maths]
-    #Plotting
+        E = E + (p + 1) * T_maths[p]
+        sigma2 = ((T_maths[p] - E) ** 2) / (len(T_maths) - 1)
+    print(
+        "Mathematical values : Empiric mean T_{} : {} Variance {})".format(
+            t_i, E, sqrt(sigma2)
+        )
+    )
+    T_maths = [x * 100 for x in T_maths]
+    # Plotting
    fig = plt.figure()
-    #T plot
+    # T plot
    x = np.arange(N)
    print(x)
    print(Sum_T)
    ax = fig.add_subplot(221)
-    ax.bar(x,Sum_T, width=1,label='Empirical values', edgecolor="blue", linewidth=0.7,color='red')
-    ax.set(xlim=(0, N), xticks=np.arange(0, N),ylim=(0,20), yticks=np.linspace(0, 20, 2))
-    ax.set_ylabel('Items(n) in %')
-    ax.set_xlabel('Bins (1-{})'.format(N))
-    ax.set_title('Items percentage for each bin and {} items (Number of items in each bin)'.format(P))
-    ax.legend(loc='upper left',title='Legend')
+    ax.bar(
+        x,
+        Sum_T,
+        width=1,
+        label="Empirical values",
+        edgecolor="blue",
+        linewidth=0.7,
+        color="red",
+    )
+    ax.set(
+        xlim=(0, N), xticks=np.arange(0, N), ylim=(0, 20), yticks=np.linspace(0, 20, 2)
+    )
+    ax.set_ylabel("Items(n) in %")
+    ax.set_xlabel("Bins (1-{})".format(N))
+    ax.set_title(
+        "Items percentage for each bin and {} items (Number of items in each bin)".format(
+            P
+        )
+    )
+    ax.legend(loc="upper left", title="Legend")

-    #Mathematical P(Ti=k) plot. It shows the Ti(t_i) law with the probability of each number of items.
+    # Mathematical P(Ti=k) plot. It shows the Ti(t_i) law with the probability of each number of items.
    print(len(Tk[t_i]))
    bx = fig.add_subplot(222)
-    bx.hist(Tk[t_i],bins=10, width=1,label='Empirical values', edgecolor="blue", linewidth=0.7,color='red')
-    bx.set(xlim=(0, N), xticks=np.arange(0, N),ylim=(0,len(Tk[t_i])), yticks=np.linspace(0, 1, 1))
-    bx.set_ylabel('P(T{}=i)'.format(t_i))
-    bx.set_xlabel('Bins i=(1-{}) in %'.format(N))
-    bx.set_title('T{} histogram for {} items (Number of items in each bin)'.format(t_i,P))
-    bx.legend(loc='upper left',title='Legend')
+    bx.hist(
+        Tk[t_i],
+        bins=10,
+        width=1,
+        label="Empirical values",
+        edgecolor="blue",
+        linewidth=0.7,
+        color="red",
+    )
+    bx.set(
+        xlim=(0, N),
+        xticks=np.arange(0, N),
+        ylim=(0, len(Tk[t_i])),
+        yticks=np.linspace(0, 1, 1),
+    )
+    bx.set_ylabel("P(T{}=i)".format(t_i))
+    bx.set_xlabel("Bins i=(1-{}) in %".format(N))
+    bx.set_title(
+        "T{} histogram for {} items (Number of items in each bin)".format(t_i, P)
+    )
+    bx.legend(loc="upper left", title="Legend")

-    #Loi mathematique
+    # Loi mathematique
    print(T_maths)
    cx = fig.add_subplot(224)
-    cx.bar(x,T_maths, width=1,label='Theoretical values', edgecolor="blue", linewidth=0.7,color='red')
-    cx.set(xlim=(0, N), xticks=np.arange(0, N),ylim=(0,100), yticks=np.linspace(0, 100, 10))
-    cx.set_ylabel('P(T{}=i)'.format(t_i))
-    cx.set_xlabel('Bins i=(1-{})'.format(N))
-    cx.set_title('Theoretical T{} values in %'.format(t_i))
-    cx.legend(loc='upper left',title='Legend')
+    cx.bar(
+        x,
+        T_maths,
+        width=1,
+        label="Theoretical values",
+        edgecolor="blue",
+        linewidth=0.7,
+        color="red",
+    )
+    cx.set(
+        xlim=(0, N),
+        xticks=np.arange(0, N),
+        ylim=(0, 100),
+        yticks=np.linspace(0, 100, 10),
+    )
+    cx.set_ylabel("P(T{}=i)".format(t_i))
+    cx.set_xlabel("Bins i=(1-{})".format(N))
+    cx.set_title("Theoretical T{} values in %".format(t_i))
+    cx.legend(loc="upper left", title="Legend")
    plt.show()

+
 # unused
 def basic_demo():
-    N = 10 ** 1
+    N = 10**1
    sim = simulate_NFBP(N)

    print("Simulation NFBP pour {} packaets. Contenu des boites :".format(N))
    for j in range(sim["i"] + 1):
        remplissage = floor(sim["R"][j] * 100)
-        print("Boite {} : Rempli à {} % avec {} paquets. Taille du premier paquet : {}".format(j, remplissage, sim["T"][j],
-                                                                                               sim["V"][j]))
+        print(
+            "Boite {} : Rempli à {} % avec {} paquets. Taille du premier paquet : {}".format(
+                j, remplissage, sim["T"][j], sim["V"][j]
+            )
+        )

    print()
-    stats_NFBP(10 ** 3, 10)
+    stats_NFBP(10**3, 10)

-    N = 10 ** 1
+    N = 10**1
    sim = simulate_NFDBP(N)
    print("Simulation NFDBP pour {} packaets. Contenu des boites :".format(N))
    for j in range(sim["i"] + 1):
        remplissage = floor(sim["R"][j] * 100)
-        print("Boite {} : Rempli à {} % avec {} paquets. Taille du premier paquet : {}".format(j, remplissage,
-                                                                                                   sim["T"][j],
-                                                                                                   sim["V"][j]))
+        print(
+            "Boite {} : Rempli à {} % avec {} paquets. Taille du premier paquet : {}".format(
+                j, remplissage, sim["T"][j], sim["V"][j]
+            )
+        )
+

 stats_NFBP_iter(10**3, 10)
-print('\n\n')
-stats_NFDBP(10 ** 3, 10,1)
+print("\n\n")
+stats_NFDBP(10**3, 10, 1)