Module 4: Empirical Time Complexity

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import time

Testing Framework

In [17]:
def test(Ns_range, num_trials, gen, alg):
    """
    Run a bunch of timing tests on an algorithm
    Parameters
    ----------
    Ns_range: range
        The range of input sizes I want to try on my algorithm
    num_trials: int
        The number of experiments I perform for each input size in my range
    gen: function: N -> problem instance
        Generate a random problem for my algorithm to solve
        of a particular size
    alg: function: problem instance -> solution
        Actual algorithm to solve the problem
    """
    Ns = [] # Sizes
    avg_times = []
    for N in Ns_range:
        trials = np.zeros(num_trials)
        for i in range(num_trials):
            problem = gen(N)
            tic = time.time()
            alg(problem)
            toc = time.time()
            trials[i] = toc-tic
        Ns.append(N)
        avg_times.append(np.mean(trials))
    plt.plot(Ns, avg_times)
    plt.xlabel("Ns")
    plt.ylabel("Time (Seconds)")

Duplicate elements in arrays

Ex) Have four elements [0, 1, 2, 3]

Pairs: (0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)

In [13]:
def has_duplicates_pairs(arr):
    """
    Figure out if there is an element that's repeated
    in arr by checking all pairs
    Parameters
    ----------
    arr: list
        Array of elements
    
    Returns
    -------
    True if there is a duplicate
    False if every element is unique
    """
    duplicate = False
    for i, x in enumerate(arr):
        for j, y in enumerate(arr):
            if i != j and x == y:
                duplicate = True
    return True
In [22]:
def has_duplicates_pairs_improved(arr):
    """
    Figure out if there is an element that's repeated
    in arr by checking all pairs, taking care not to
    check a pair more than once
    Parameters
    ----------
    arr: list
        Array of elements
    
    Returns
    -------
    True if there is a duplicate
    False if every element is unique
    """
    duplicate = False
    for i, x in enumerate(arr):
        for j in range(i+1, len(arr)):
            y = arr[j]
            if x == y:
                duplicate = True
    return True
In [19]:
def gen_rand_arr(N):
    return np.random.randint(0, N*3, N)
In [6]:
gen = lambda n: np.random.randint(0, n*5, n)
test(range(100, 2000, 50), 1000, gen, has_duplicates_pairs)
test(range(100, 2000, 50), 1000, gen, has_duplicates_pairs_improved)
plt.legend(["All Pairs", "All Pairs Improved", "Sorted"])
plt.xlabel("Number of elements")
plt.ylabel("Average Time Elapsed")
plt.show()
In [ ]: