Untitled

# It is chapter 1 of the book.
import heapq
import json
import os
from operator import itemgetter, attrgetter
from collections import deque, defaultdict, OrderedDict, Counter, namedtuple
from itertools import groupby, compress

# Unpacking a Sequence into Separate Variables
"""Any sequence (or iterable) can be unpacked into variables using a simple assignment operation."""
x, y = (4, 5)
print(x)
print(y)

data = ["ACME", 2, 51.2, (2016, 7, 2)]
name, shares, price, date = data
print(name, shares, price)
print(date)

names, shares, price, (year, month, day) = data
print(name, shares, price)
print(year)
print(month)
print(day)

names, shares, _, (year, month, _) = data
print(names, shares, year, month)

""""Unpacking actually works with any object that happens to be iterable, not just tuples or lists.
This includes strings, files, iterators, and generators"""

s = "hello"
a, b, c, multi_dict3, e = s
print(a, c, e)


# Unpacking Elements from Iterables of Arbitrary Length
def drop_fist_last(grades):
    first, *middle, last = grades
    return sum(middle) / len(middle)


drop_fist_last([1, 2, 3, 4, 5, 6, 7, 8])

record = ["Dave", "Dave@exa,ple.com", "721-423-2341", "871-562-3427"]
name, email, *phone_numbers = record

"""It’s worth noting that the phone_numbers variable will always be a list, regardless of how many phone numbers are unpacked."""
print(phone_numbers)

*trailing, current = [1, 2, 3, 4, 5, 6, 7, 8]
print(trailing)
print(current)

"""It is worth noting that the star syntax can be especially useful when iterating over a sequence of tuples of varying length"""
record = [("foo", 2, 3), ("bar", "hello"), ("foo", 4, 7)]


def do_foo(x_do, y_do):
    print("foo", x_do, y_do)


def do_bar(s_do):
    print("bar", s_do)


for tag, *args in record:
    if tag == "foo":
        do_foo(*args)
    elif tag == "bar":
        do_bar(*args)

line_read = "nobody:*:2:2:Unprivileged User:var/empty:/ermia/home/bin"
username, *fields, home_dir, sh = line_read.split(":")
print(username, home_dir, sh)

data = ["ACME", 2, 51.2, (2016, 7, 2)]
journal_name, *ign1, (year, *ign2) = data
print(journal_name, year)

"""writing functions that perform such splitting in order to carry out some kind of clever recursive algorithm"""


def sum1(items):
    head, *tail = items
    return head + sum1(tail) if tail else head


sum1([1, 2, 3, 4, 5])

# Keeping the Last N Items
"""Using deque(maxlen=N) creates a fixed-sized queue.
When new items are added and the queue is full, the oldest item is automatically removed."""


def search(lines, pattern, history=5):
    previous_lines = deque(maxlen=history)
    for line_to_read in lines:
        if pattern in line_to_read:
            yield line_to_read, previous_lines
        previous_lines.append(line_to_read)


with open("foo.txt") as file:
    for line, previous_line in search(file, "python", 5):
        for p_line in previous_line:
            print(p_line, end=" ")
            print("_" * 20)

q = deque(maxlen=3)
q.append(1)
q.append(2)
q.append(3)
print(q)

q.append(4)
print(q)

# Finding the Largest or Smallest N Items
"""The heapq module has two functions—nlargest() and nsmallest()"""
nums = [12, 15, 3, 56, 43, 23, 87, 99, 57]
print(heapq.nlargest(5, nums))
print(heapq.nsmallest(6, nums))

"""Both functions also accept a key parameter that allows them to be used with more complicated data structures."""
portfolio = [{'item_name': 'IBM', 'shares': 100, 'price': 91.1},
             {'item_name': 'APPLE', 'shares': 50, 'price': 543.22},
             {'item_name': 'FB', 'shares': 200, 'price': 21.09},
             {'item_name': 'HPQ', 'shares': 35, 'price': 31.75},
             {'item_name': 'YAHOO', 'shares': 45, 'price': 16.35},
             {'item_name': 'ACME', 'shares': 75, 'price': 115.65}
             ]
print(heapq.nsmallest(2, portfolio, key=lambda structure: structure["price"]))
print(heapq.nlargest(3, portfolio, key=lambda structure: structure["price"]))

# Mapping Keys to Multiple Values in a Dictionary
"""If you want to map keys to multiple values, you need to store the multiple values in another container such as a list or set."""
multi_dict1 = {"a": [1, 2, 3],
               "b": [4, 5]}
multi_dict2 = {"a": {1, 2, 3},
               "b": {4, 5}}
""" Use a set if you want to eliminate duplicates (and don’t care about the order)."""

"""To easily construct such dictionaries, you can use defaultdict in the collections module."""
multi_dict3 = defaultdict(list)
multi_dict3["a"].append(1)
multi_dict3["b"].append(2)
multi_dict3["a"].append(3)
multi_dict3["b"].append(4)
multi_dict3["b"].append(5)
print(multi_dict3)

# Keeping Dictionaries in Order
"""To control the order of items in a dictionary, you can use an OrderedDict from the collections module."""
ordered_dict = OrderedDict()
ordered_dict[1] = "foo"
ordered_dict[2] = "bar"
ordered_dict[3] = "spam"

for key in ordered_dict:
    print(key, ordered_dict[key])

""""If you want to precisely control the order of fields appearing in a JSON encoding,
first building the data in an OrderedDict will do the trick:"""
print(json.dumps(ordered_dict))

# Calculating with Dictionaries
prices = {"ACME": 45.23,
          "APPLE": 46.25,
          "YAHOO": 27.79,
          "GOOGLE": 54.23}

print(max(zip(prices.values(), prices.keys())))
print(min(zip(prices.values(), prices.keys())))
print(sorted(zip(prices.values(), prices.keys())))

# Finding Commonalities in Two Dictionaries
dict1 = {"x": 1, "y": 2, "z": 3}
dict2 = {"w": 11, "x": 4, "y": 2}
dict1.keys() & dict2.keys()
dict1.keys() - dict2.keys()
dict1.items() & dict2.items()

dict3 = {key: dict1[key] for key in dict1.keys() - {'z', 'w'}}
print(dict3)

# Naming a Slice
"""In general, the built-in slice() creates a slice object that can be used anywhere a slice is allowed."""

item = [0, 1, 2, 3, 4, 5, 6]
a = slice(2, 4)

print(item[2:4])
print(item[a])

item[a] = [10, 11]
print(item)

del item[a]
print(item)

# Determining the Most Frequently Occurring Items in a Sequence
words = [
    'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
    'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
    'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
    'my', 'eyes', "you're", 'under']

words_count = Counter(words)
top_three = words_count.most_common(3)
print(top_three)

"""As input, Counter objects can be fed any sequence of hashable input items."""
print(words_count['not'])
print(words_count["eyes"])

"""If you want to increment the count manually, simply use addition:"""
more_words = ['why', 'are', 'you', 'not', 'looking', 'in', 'my', 'eyes']
for word in more_words:
    words_count[word] += 1

print(words_count["eyes"])

"""Or, alternatively, you could use the update() method:"""
words_count.update(more_words)
print(words_count['eyes'])

"""A little-known feature of Counter instances is that they can be easily combined using various mathematical operations."""
words_count = Counter(words)
print(words_count)

words_count_more = Counter(more_words)
print(words_count_more)

print(words_count + words_count_more)
print(words_count - words_count_more)

# Sorting a List of Dictionaries by a Common Key
rows = [
    {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
    {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
    {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}]

rows_by_name = sorted(rows, key=itemgetter("fname"))
print(rows_by_name)

rows_by_id = sorted(rows, key=itemgetter("uid"))
print(rows_by_id)

"""The itemgetter() function can also accept multiple keys."""
rows_by_name_and_id = sorted(rows, key=itemgetter("fname", "lname"))
print(rows_by_name_and_id)

"""The functionality of itemgetter() is sometimes replaced by lambda expressions."""
rows_by_name = sorted(rows, key=lambda r: r["fname"])
print(rows_by_name)

"""Last, but not least, don’t forget that the technique shown in this recipe can be applied to functions such as min() and max()."""
min_rows_by_uid = min(rows, key=lambda r: r["uid"])
print(min_rows_by_uid)


# Sorting Objects Without Native Comparison Support
class User:
    def __init__(self, user_id):
        self.user_id = user_id

    def __repr__(self):
        return "User({})".format(self.user_id)


users = [User(24), User(45), User(32)]
print(sorted(users, key=lambda u: u.user_id))

"""Instead of using lambda, an alternative approach is to use operator.attrgetter():"""
print(sorted(users, key=attrgetter("user_id")))

"""It is also worth noting that the technique used in this recipe can be applied to functions such as min() and max()."""
max(users, key=attrgetter("user_id"))

# Grouping Records Together Based on a Field
"""The itertools.groupby() function is particularly useful for grouping data together like this."""
rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}, ]

"""If you want to iterate over the data in chunks grouped by date,
first sort by the desired field (in this case, date) and then use itertools.groupby():"""

# sort by the desired field first
rows.sort(key=itemgetter("date"))

# iterate over the groups
for date, items in groupby(rows, key=itemgetter("date")):
    print(date)
    for i in items:
        print(" ", i)

# Filtering Sequence Elements
"""The easiest way to filter sequence data is often to use a list comprehension."""
my_list = [-4, -5, 0, 3, -2, 17, -43, 23]
my_list_positive = [n for n in my_list if n > 0]
my_list_clip = [n if n > 0 else 0 for n in my_list]

print(my_list_positive)
print(my_list_clip)

"""you can also use generator expressions to produce the filtered values iteratively."""
my_list_positive = (n for n in my_list if n > 0)
for item in my_list_positive:
    print(item)

"""In more complicated cases, put the filtering code into its own function and use the built-in filter() function."""


def is_int(value):
    try:
        int(value)
        return True
    except ValueError:
        return False


"""filter() creates an iterator, so if you want to create a list of results, make sure you also use list() as shown."""

int_values = list(filter(is_int, ["1", "2", "NA", "4", "-"]))
print(int_values)

"""Another filtering tool is itertools.compress(), which takes an iterable and an accompanying Boolean selector sequence as input."""
addresses = [
    '5412 N CLARK',
    '5148 N CLARK',
    '5800 E 58TH',
    '2122 N CLARK'
    '5645 N RAVENSWOOD',
    '1060 W ADDISON',
    '4801 N BROADWAY',
    '1039 W GRANVILLE',
]

counts = [0, 3, 10, 4, 1, 7, 6, 1]
more_five = [n > 5 for n in counts]
print(more_five)
list(compress(addresses, more_five))

# Extracting a Subset of a Dictionary
"""This is easily accomplished using a dictionary comprehension."""

prices = {"APPLE": 42.5, "GOOGLE": 54.2, "YAHOO": 48.73}
prices1 = {key: value for key, value in prices.items() if value > 45}
# or prices1 = dict((key,value) for key, value in prices.items() if value > 45}
print(prices1)

tech_names = {"APPLE", "FACEBOOK", "GOOGLE"}
prices2 = {key: value for key, value in prices.items() if key in tech_names}
print(prices2)

# Mapping Names to Sequence Elements
"""collections.namedtuple() provides these benefits, while adding minimal overhead over using a normal tuple object."""

subscriber = namedtuple("subscriber", ["address", "joined"])
sub = subscriber("jon@example.edu", "2010-7-18")
print(sub)
print(sub.address, sub.joined)

"""namedtuple is interchangeable with a tuple and supports all of the usual tuple operations such as indexing and unpacking. For example:"""
len(sub)
address1, joined1 = sub
print(address1, joined1)

# Transforming and Reducing Data at the Same Time
"""A very elegant way to combine a data reduction and a transformation is to use a generator-expression argument."""
nums = [1, 2, 3, 4, 5]
print(sum(x ** 2 for x in nums))

# Determine if any .py files exist in a directory
files = os.listdir("/home/ermia/PycharmProjects/Python Cookbook")
if any(name.endswith(".py") for name in files):
    print("there is a python file")
else:
    print("there is no python file")

# Output a tuple as CSV
s = ("ACME", 542.654, 72.3)
print(",".join(str(x) for x in s))

# # Data reduction across fields of a data structure
portfolio = [
    {'item_name': 'GOOGLE', 'shares': 50},
    {'item_name': 'YaHOO', 'shares': 75},
    {'item_name': 'AOL', 'shares': 20},
    {'item_name': 'FACEBOOK', 'shares': 65}
]
min_shares = min(s["shares"] for s in portfolio)
print(min_shares)

"""Certain reduction functions such as min() and max() accept a key argument,
which might be useful in situations where you might be inclined to use a generator"""

min_shares = min(portfolio, key=lambda item_name: item_name["shares"])
print(min_shares)