Python itertools

why itertools?

  • itertools is a built-in module in Python for handling iterables.
  • It provides a number of fast, memory-efficient way of looping through iterables to achieve different desired results.
  • It is a powerful yet under-rated module that every data scientist should know in order to write clean, elegant and readable code in Python.

Infinite iterators

count()

  • count is a infinite iterator
  • it takes two arguments start, step
  • By default start=0 and step=1
  • It will continuously generate the numbers indefinitely unless we stop it.
from itertools import count

def count_5():
    counter = count()
    for c in counter:
        print(c)
        if c == 5:
            break

def five_multiples_5():
    counter = count(start=5, step=5)
    stop = 1
    for c in counter:
        print(c)
        if stop == 5:
            break
        stop += 1
count_5()
# output:
# 0
# 1
# 2
# 3
# 4
# 5
five_multiples_5()
# output:
# 5
# 10
# 15
# 20
# 25

cycle()

  • The cycle() function accepts an iterable and generates an infinite iterator
from itertools import cycle

seq = [1, 2, 3]
iterator = cycle(seq)

c = 0
for num in iterator:
    c = c+1
    print(num)
    if c == 6:
        break
# output:
# 1
# 2
# 3
# 1
# 2
# 3

repeat()

  • it repeats the given element n number of times if defined otherwise it will repeat it endlessly.
  • repeat returns an iterator

    syntax

    repeat(element, n_times)
    

  • lets look at an example

from itertools import repeat

repeater = repeat(2, 10)
print(list(repeater))
# output: [2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
  • let's generate the square numbers using itertools.repeat
from itertools import repeat

squares = map(pow, range(1, 10), repeat(2, 10))
print(list(squares))
# output: [1, 4, 9, 16, 25, 36, 49, 64, 81]

Itertools functions

accumulate()

  • It accumulates the given iterable with operator.add or any other function if provided.
from itertools import accumulate

sums = accumulate([1,2,3,4,5])
print(list(sums))
# output: [1, 3, 6, 10, 15]

muls = accumulate([1,2,3,4,5], lambda x, y: x * y)
print(list(muls))
# output: [1, 2, 6, 24, 120]

chain()

  • itertools.chain is a generator function which accepts iterables as arguments.
  • The function starts by iteratively returning each element from the first argument until it is exhausted.
from itertools import chain

alphabets = ['a', 'b', 'c', 'd', 'e']
numbers = [1,2,3,4,5,6,7,8,9,10]
iterator = chain(alphabets, numbers)
print(list(iterator))
# output: ['a', 'b', 'c', 'd', 'e', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

chain.from_iterable()

  • it's an alternate constructor for itertools.chain.
  • Gets chained inputs from a single iterable argument that is evaluated lazily
  • let's look at examples
from itertools import chain

iterator = chain.from_iterable([[1,2], [3,4], 'ABC'])
print(list(iterator))
# output: [1, 2, 3, 4, 'A', 'B', 'C']

compress()

  • Make an iterator that filters elements from data returning only those that have a corresponding element in selectors that evaluates to True.
  • Stops when either the data or selectors iterables has been exhausted.
from itertools import compress

iterator = compress('ABCDEFGHIJ', [True, False, True])
print(list(iterator))
# output: ['A', 'C']

iterator = compress('ABCDEFGHIJ', [0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
print(list(iterator))
# output: ['B', 'D', 'F', 'H', 'J']

dropwhile()

  • Make an iterator that drops elements from the iterable as long as the predicate is true;
from itertools import dropwhile

numbers = [2, 4, 6, 8, 9, 10, 11, 12]
func = lambda x: x % 2 == 0
iterator = dropwhile(func, numbers)
print(list(iterator))
# output: [9, 10, 11, 12]

filterfalse()

  • Make an iterator that filters elements from iterable returning only those for which the predicate is False.
from itertools import filterfalse

numbers = [2, 4, 6, 8, 9, 10, 11, 12]
func = lambda x: x % 2 == 0
iterator = filterfalse(func, numbers)
print(list(iterator))
# output: [9, 11]

groupby()

  • Make an iterator that returns consecutive keys and groups from the iterable.
  • The key is a function computing a key value for each element.
from itertools import groupby

data = [
    {'name': 'satyajit', 'address': 'btm', 'pin': 560076},
    {'name': 'Preetam', 'address': 'btm', 'pin': 560076},
    {'name': 'Mukul', 'address': 'Silk board', 'pin': 560078},
    {'name': 'John', 'address': 'Gold board', 'pin': 560076},
]

iterator = groupby(data, lambda x: x['pin'])
for key, group in iterator:
    print(key, list(group))
# output: 
# 560076 [{'name': 'satyajit', 'address': 'btm', 'pin': 560076}, {'name': 'Preetam', 'address': 'btm', 'pin': 560076}]
# 560078 [{'name': 'Mukul', 'address': 'Silk board', 'pin': 560078}]
# 560076 [{'name': 'John', 'address': 'Gold board', 'pin': 560076}]

islice()

  • Make an iterator that returns selected elements from the iterable.
  • It works just like normal python slicing except it returns an iterator.
from collections import islice

iterator = islice("hello python", 0, 8, 2)
print(list(iterator))
# output: ['h', 'l', 'o', 'p']

pairwise()

  • Return successive overlapping pairs taken from the input iterable.
from itertools import pairwise

print(list(pairwise('ABCD')))
# output: [('A', 'B'), ('B', 'C'), ('C', 'D')]
print(list(pairwise([1,2,3, 4])))
# output: [(1, 2), (2, 3), (3, 4)]

starmap()

  • Make an iterator that computes the function using arguments obtained from the iterable.
from itertools import starmap

print(list(starmap(pow, [(2,4), (3,4), (4,4)])))
# output: [16, 81, 256]

takewhile()

  • Make an iterator that returns elements from the iterable as long as the predicate is true.
from itertools import takewhile

def even(num):
    return num % 2 == 0

print(list(takewhile(even, [2,4,6,8,10,11,12,14])))
# output: [2, 4, 6, 8, 10]

tee()

  • Return n independent iterators from a single iterable.
from itertools import tee

items = [1,2,3,4]
iterable = iter(items)

iterable_list = tee(iterable, 3)

print(list(iterable_list[0]))
# output: [1, 2, 3, 4]
print(list(iterable_list[1]))
# output: [1, 2, 3, 4]
print(list(iterable_list[2]))
# output: [1, 2, 3, 4]

zip_longest()

  • Make an iterator that aggregates elements from each of the iterables.
  • If the iterables are of uneven length, missing values are filled-in with fillvalue.
  • Iteration continues until the longest iterable is exhauste
from itertools import zip_longest

students = ["Bob", "Ann", "John", "Marry", "Daisy", "Amy"]
grades = ["A", "A+", "D"]

print(list(zip_longest(students, grades, fillvalue="-")))
# output: [('Bob', 'A'), ('Ann', 'A+'), ('John', 'D'), ('Marry', '-'), ('Daisy', '-'), ('Amy', '-')]

Combinatoric iterators

product()

  • Cartesian product of input iterables.
from itertools import product

s1 = 'AB'
s2 = 'DE'
print(list(product(s1,s2)))
# output:[('A', 'D'), ('A', 'E'), ('B', 'D'), ('B', 'E')]

permutations()

  • Return successive r length permutations of elements in the iterable.
import itertools

print(list(itertools.permutations([1, 2, 3])))
# output: [(1, 2, 3), (1, 3, 2), (2, 1, 3), (2, 3, 1), (3, 1, 2), (3, 2, 1)]

combinations()

  • Return r length subsequences of elements from the input iterable.
from itertools import combinations

numbers = [1,2,3]
print(list(combinations(numbers, r=2)))
# output: [(1, 2), (1, 3), (2, 3)]
print(list(combinations(numbers, r=3)))
# output: [(1, 2, 3)]

combinations_with_replacement()

  • Return r length subsequences of elements from the input iterable allowing individual elements to be repeated more than once.
from itertools import combinations_with_replacement

numbers = [1,2,3]
print(list(combinations_with_replacement(numbers, r=2)))
# output: [(1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)]
print(list(combinations_with_replacement(numbers, r=3)))
# output: [(1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 2, 2), (1, 2, 3), (1, 3, 3), (2, 2, 2), (2, 2, 3), (2, 3, 3), (3, 3, 3)]

References: