#!/usr/bin/env python # Demonstration of correct and incorrect shuffling algorithms # Stan Seibert http://mtrr.org/blog/?p=94 import copy import operator from math import sqrt def swap(i, j, seq): '''Return a new sequence with elements i and j exchanged''' new_seq = copy.copy(seq) tmp = new_seq[i] new_seq[i] = new_seq[j] new_seq[j] = tmp return new_seq def swap_incorrect(i, seq): '''Return a list of sequences with all possible swaps of element i in seq with any other element.''' return [ swap(i,j,seq) for j in range(len(seq)) ] def swap_correct(i, seq): '''Return a list of sequences with all possible swaps of element i in seq with elements after and including element i.''' return [ swap(i,j,seq) for j in range(i, len(seq)) ] def swap_nearly_correct(i, seq): '''Return a list of sequences with all possible swaps of element i in seq with elements after but not including including element i.''' if i == len(seq) - 1: return [seq] else: return [ swap(i,j,seq) for j in range(i+1, len(seq)) ] def do_shuffle(seq, swap_func=swap_correct): '''Returns all possible shuffles with the provided swapping function.''' seq_list = [seq] for i in range(len(seq)): next_list = [] for seq in seq_list: next_list += swap_func(i, seq) seq_list = next_list return seq_list def count_freq(seq_list): '''Takes a list of sequences and returns a frequency dictionary''' freq = {} for seq in seq_list: key = tuple(seq) if key not in freq: freq[key] = 1 else: freq[key] += 1 return freq def freq_stats(freq): '''Returns mean and sample standard deviation of the frequencies in freq dict.''' n = len(freq) mom1 = 0 mom2 = 0 for count in freq.values(): mom1 += count mom2 += count**2 return mom1/float(n), sqrt( (n*mom2 - mom1**2) / (n*(n-1)) ) def least_common_sequence(freq, limit=5): '''Returns the least common sequences in the frequency table as a list. If several sequences have the same frequency, all are returned up to the specified limit ''' min_freq = 1e200 min_freq_keys = [] for key, count in freq.items(): if count == min_freq: min_freq_keys.append(key) elif count < min_freq: min_freq = count min_freq_keys = [key] return min_freq_keys[:limit] def tuple_to_str(t): '''Converts a tuple of strings to a single string through concatenation''' return reduce(operator.add, t) ###### if __name__ == '__main__': n = 7 # this is probably as big as you want to go starting_sequence = [chr(i) for i in range(ord('A'), ord('A') + n)] for swap_func in swap_incorrect, swap_nearly_correct, swap_correct: shuffle_seq = do_shuffle(starting_sequence, swap_func) freq = count_freq(shuffle_seq) mean, stdev = freq_stats(freq) lcs = least_common_sequence(freq, limit=None) if len(lcs) == len(shuffle_seq): lcs = '(all)' # Don't print least common sequences if they are all the same else: lcs = map(tuple_to_str, lcs) print '%20s: orderings, total = %d unique = %d' \ % (swap_func.__name__, len(shuffle_seq), len(freq)) print ' '*20,' repetitions = %1.0f +/- %1.0f' % (mean, stdev) print ' '*20,' variation = %1.3f%%' % (stdev/mean*100.0,) print ' '*20,' least common seqs =', str(lcs) print