Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # It is chapter 1 of the book.
- import heapq
- import json
- import os
- from operator import itemgetter, attrgetter
- from collections import deque, defaultdict, OrderedDict, Counter, namedtuple
- from itertools import groupby, compress
- # Unpacking a Sequence into Separate Variables
- """Any sequence (or iterable) can be unpacked into variables using a simple assignment operation."""
- x, y = (4, 5)
- print(x)
- print(y)
- data = ["ACME", 2, 51.2, (2016, 7, 2)]
- name, shares, price, date = data
- print(name, shares, price)
- print(date)
- names, shares, price, (year, month, day) = data
- print(name, shares, price)
- print(year)
- print(month)
- print(day)
- names, shares, _, (year, month, _) = data
- print(names, shares, year, month)
- """"Unpacking actually works with any object that happens to be iterable, not just tuples or lists.
- This includes strings, files, iterators, and generators"""
- s = "hello"
- a, b, c, multi_dict3, e = s
- print(a, c, e)
- # Unpacking Elements from Iterables of Arbitrary Length
- def drop_fist_last(grades):
- first, *middle, last = grades
- return sum(middle) / len(middle)
- drop_fist_last([1, 2, 3, 4, 5, 6, 7, 8])
- record = ["Dave", "Dave@exa,ple.com", "721-423-2341", "871-562-3427"]
- name, email, *phone_numbers = record
- """It’s worth noting that the phone_numbers variable will always be a list, regardless of how many phone numbers are unpacked."""
- print(phone_numbers)
- *trailing, current = [1, 2, 3, 4, 5, 6, 7, 8]
- print(trailing)
- print(current)
- """It is worth noting that the star syntax can be especially useful when iterating over a sequence of tuples of varying length"""
- record = [("foo", 2, 3), ("bar", "hello"), ("foo", 4, 7)]
- def do_foo(x_do, y_do):
- print("foo", x_do, y_do)
- def do_bar(s_do):
- print("bar", s_do)
- for tag, *args in record:
- if tag == "foo":
- do_foo(*args)
- elif tag == "bar":
- do_bar(*args)
- line_read = "nobody:*:2:2:Unprivileged User:var/empty:/ermia/home/bin"
- username, *fields, home_dir, sh = line_read.split(":")
- print(username, home_dir, sh)
- data = ["ACME", 2, 51.2, (2016, 7, 2)]
- journal_name, *ign1, (year, *ign2) = data
- print(journal_name, year)
- """writing functions that perform such splitting in order to carry out some kind of clever recursive algorithm"""
- def sum1(items):
- head, *tail = items
- return head + sum1(tail) if tail else head
- sum1([1, 2, 3, 4, 5])
- # Keeping the Last N Items
- """Using deque(maxlen=N) creates a fixed-sized queue.
- When new items are added and the queue is full, the oldest item is automatically removed."""
- def search(lines, pattern, history=5):
- previous_lines = deque(maxlen=history)
- for line_to_read in lines:
- if pattern in line_to_read:
- yield line_to_read, previous_lines
- previous_lines.append(line_to_read)
- with open("foo.txt") as file:
- for line, previous_line in search(file, "python", 5):
- for p_line in previous_line:
- print(p_line, end=" ")
- print("_" * 20)
- q = deque(maxlen=3)
- q.append(1)
- q.append(2)
- q.append(3)
- print(q)
- q.append(4)
- print(q)
- # Finding the Largest or Smallest N Items
- """The heapq module has two functions—nlargest() and nsmallest()"""
- nums = [12, 15, 3, 56, 43, 23, 87, 99, 57]
- print(heapq.nlargest(5, nums))
- print(heapq.nsmallest(6, nums))
- """Both functions also accept a key parameter that allows them to be used with more complicated data structures."""
- portfolio = [{'item_name': 'IBM', 'shares': 100, 'price': 91.1},
- {'item_name': 'APPLE', 'shares': 50, 'price': 543.22},
- {'item_name': 'FB', 'shares': 200, 'price': 21.09},
- {'item_name': 'HPQ', 'shares': 35, 'price': 31.75},
- {'item_name': 'YAHOO', 'shares': 45, 'price': 16.35},
- {'item_name': 'ACME', 'shares': 75, 'price': 115.65}
- ]
- print(heapq.nsmallest(2, portfolio, key=lambda structure: structure["price"]))
- print(heapq.nlargest(3, portfolio, key=lambda structure: structure["price"]))
- # Mapping Keys to Multiple Values in a Dictionary
- """If you want to map keys to multiple values, you need to store the multiple values in another container such as a list or set."""
- multi_dict1 = {"a": [1, 2, 3],
- "b": [4, 5]}
- multi_dict2 = {"a": {1, 2, 3},
- "b": {4, 5}}
- """ Use a set if you want to eliminate duplicates (and don’t care about the order)."""
- """To easily construct such dictionaries, you can use defaultdict in the collections module."""
- multi_dict3 = defaultdict(list)
- multi_dict3["a"].append(1)
- multi_dict3["b"].append(2)
- multi_dict3["a"].append(3)
- multi_dict3["b"].append(4)
- multi_dict3["b"].append(5)
- print(multi_dict3)
- # Keeping Dictionaries in Order
- """To control the order of items in a dictionary, you can use an OrderedDict from the collections module."""
- ordered_dict = OrderedDict()
- ordered_dict[1] = "foo"
- ordered_dict[2] = "bar"
- ordered_dict[3] = "spam"
- for key in ordered_dict:
- print(key, ordered_dict[key])
- """"If you want to precisely control the order of fields appearing in a JSON encoding,
- first building the data in an OrderedDict will do the trick:"""
- print(json.dumps(ordered_dict))
- # Calculating with Dictionaries
- prices = {"ACME": 45.23,
- "APPLE": 46.25,
- "YAHOO": 27.79,
- "GOOGLE": 54.23}
- print(max(zip(prices.values(), prices.keys())))
- print(min(zip(prices.values(), prices.keys())))
- print(sorted(zip(prices.values(), prices.keys())))
- # Finding Commonalities in Two Dictionaries
- dict1 = {"x": 1, "y": 2, "z": 3}
- dict2 = {"w": 11, "x": 4, "y": 2}
- dict1.keys() & dict2.keys()
- dict1.keys() - dict2.keys()
- dict1.items() & dict2.items()
- dict3 = {key: dict1[key] for key in dict1.keys() - {'z', 'w'}}
- print(dict3)
- # Naming a Slice
- """In general, the built-in slice() creates a slice object that can be used anywhere a slice is allowed."""
- item = [0, 1, 2, 3, 4, 5, 6]
- a = slice(2, 4)
- print(item[2:4])
- print(item[a])
- item[a] = [10, 11]
- print(item)
- del item[a]
- print(item)
- # Determining the Most Frequently Occurring Items in a Sequence
- words = [
- 'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
- 'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
- 'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
- 'my', 'eyes', "you're", 'under']
- words_count = Counter(words)
- top_three = words_count.most_common(3)
- print(top_three)
- """As input, Counter objects can be fed any sequence of hashable input items."""
- print(words_count['not'])
- print(words_count["eyes"])
- """If you want to increment the count manually, simply use addition:"""
- more_words = ['why', 'are', 'you', 'not', 'looking', 'in', 'my', 'eyes']
- for word in more_words:
- words_count[word] += 1
- print(words_count["eyes"])
- """Or, alternatively, you could use the update() method:"""
- words_count.update(more_words)
- print(words_count['eyes'])
- """A little-known feature of Counter instances is that they can be easily combined using various mathematical operations."""
- words_count = Counter(words)
- print(words_count)
- words_count_more = Counter(more_words)
- print(words_count_more)
- print(words_count + words_count_more)
- print(words_count - words_count_more)
- # Sorting a List of Dictionaries by a Common Key
- rows = [
- {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
- {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
- {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
- {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}]
- rows_by_name = sorted(rows, key=itemgetter("fname"))
- print(rows_by_name)
- rows_by_id = sorted(rows, key=itemgetter("uid"))
- print(rows_by_id)
- """The itemgetter() function can also accept multiple keys."""
- rows_by_name_and_id = sorted(rows, key=itemgetter("fname", "lname"))
- print(rows_by_name_and_id)
- """The functionality of itemgetter() is sometimes replaced by lambda expressions."""
- rows_by_name = sorted(rows, key=lambda r: r["fname"])
- print(rows_by_name)
- """Last, but not least, don’t forget that the technique shown in this recipe can be applied to functions such as min() and max()."""
- min_rows_by_uid = min(rows, key=lambda r: r["uid"])
- print(min_rows_by_uid)
- # Sorting Objects Without Native Comparison Support
- class User:
- def __init__(self, user_id):
- self.user_id = user_id
- def __repr__(self):
- return "User({})".format(self.user_id)
- users = [User(24), User(45), User(32)]
- print(sorted(users, key=lambda u: u.user_id))
- """Instead of using lambda, an alternative approach is to use operator.attrgetter():"""
- print(sorted(users, key=attrgetter("user_id")))
- """It is also worth noting that the technique used in this recipe can be applied to functions such as min() and max()."""
- max(users, key=attrgetter("user_id"))
- # Grouping Records Together Based on a Field
- """The itertools.groupby() function is particularly useful for grouping data together like this."""
- rows = [
- {'address': '5412 N CLARK', 'date': '07/01/2012'},
- {'address': '5148 N CLARK', 'date': '07/04/2012'},
- {'address': '5800 E 58TH', 'date': '07/02/2012'},
- {'address': '2122 N CLARK', 'date': '07/03/2012'},
- {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
- {'address': '1060 W ADDISON', 'date': '07/02/2012'},
- {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
- {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}, ]
- """If you want to iterate over the data in chunks grouped by date,
- first sort by the desired field (in this case, date) and then use itertools.groupby():"""
- # sort by the desired field first
- rows.sort(key=itemgetter("date"))
- # iterate over the groups
- for date, items in groupby(rows, key=itemgetter("date")):
- print(date)
- for i in items:
- print(" ", i)
- # Filtering Sequence Elements
- """The easiest way to filter sequence data is often to use a list comprehension."""
- my_list = [-4, -5, 0, 3, -2, 17, -43, 23]
- my_list_positive = [n for n in my_list if n > 0]
- my_list_clip = [n if n > 0 else 0 for n in my_list]
- print(my_list_positive)
- print(my_list_clip)
- """you can also use generator expressions to produce the filtered values iteratively."""
- my_list_positive = (n for n in my_list if n > 0)
- for item in my_list_positive:
- print(item)
- """In more complicated cases, put the filtering code into its own function and use the built-in filter() function."""
- def is_int(value):
- try:
- int(value)
- return True
- except ValueError:
- return False
- """filter() creates an iterator, so if you want to create a list of results, make sure you also use list() as shown."""
- int_values = list(filter(is_int, ["1", "2", "NA", "4", "-"]))
- print(int_values)
- """Another filtering tool is itertools.compress(), which takes an iterable and an accompanying Boolean selector sequence as input."""
- addresses = [
- '5412 N CLARK',
- '5148 N CLARK',
- '5800 E 58TH',
- '2122 N CLARK'
- '5645 N RAVENSWOOD',
- '1060 W ADDISON',
- '4801 N BROADWAY',
- '1039 W GRANVILLE',
- ]
- counts = [0, 3, 10, 4, 1, 7, 6, 1]
- more_five = [n > 5 for n in counts]
- print(more_five)
- list(compress(addresses, more_five))
- # Extracting a Subset of a Dictionary
- """This is easily accomplished using a dictionary comprehension."""
- prices = {"APPLE": 42.5, "GOOGLE": 54.2, "YAHOO": 48.73}
- prices1 = {key: value for key, value in prices.items() if value > 45}
- # or prices1 = dict((key,value) for key, value in prices.items() if value > 45}
- print(prices1)
- tech_names = {"APPLE", "FACEBOOK", "GOOGLE"}
- prices2 = {key: value for key, value in prices.items() if key in tech_names}
- print(prices2)
- # Mapping Names to Sequence Elements
- """collections.namedtuple() provides these benefits, while adding minimal overhead over using a normal tuple object."""
- subscriber = namedtuple("subscriber", ["address", "joined"])
- sub = subscriber("jon@example.edu", "2010-7-18")
- print(sub)
- print(sub.address, sub.joined)
- """namedtuple is interchangeable with a tuple and supports all of the usual tuple operations such as indexing and unpacking. For example:"""
- len(sub)
- address1, joined1 = sub
- print(address1, joined1)
- # Transforming and Reducing Data at the Same Time
- """A very elegant way to combine a data reduction and a transformation is to use a generator-expression argument."""
- nums = [1, 2, 3, 4, 5]
- print(sum(x ** 2 for x in nums))
- # Determine if any .py files exist in a directory
- files = os.listdir("/home/ermia/PycharmProjects/Python Cookbook")
- if any(name.endswith(".py") for name in files):
- print("there is a python file")
- else:
- print("there is no python file")
- # Output a tuple as CSV
- s = ("ACME", 542.654, 72.3)
- print(",".join(str(x) for x in s))
- # # Data reduction across fields of a data structure
- portfolio = [
- {'item_name': 'GOOGLE', 'shares': 50},
- {'item_name': 'YaHOO', 'shares': 75},
- {'item_name': 'AOL', 'shares': 20},
- {'item_name': 'FACEBOOK', 'shares': 65}
- ]
- min_shares = min(s["shares"] for s in portfolio)
- print(min_shares)
- """Certain reduction functions such as min() and max() accept a key argument,
- which might be useful in situations where you might be inclined to use a generator"""
- min_shares = min(portfolio, key=lambda item_name: item_name["shares"])
- print(min_shares)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement