Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -----BEGIN PGP SIGNED MESSAGE-----
- Hash: SHA1
- #!/usr/bin/env python3
- #
- # So, I've got some free python training for you! This code works both
- # with python2 and python3. I love python3's print function, so let's add
- # it...
- from __future__ import print_function
- # Ok. First thing, did you know about namedtuple? It works as a regular
- # tuple, but you can access the fields by name (hence "named" tuple).
- # Besides, it is optimized for producing a large amount of instances.
- from collections import namedtuple
- # It works as a meta-type: you define types by calling a function.
- Row = namedtuple("Row", "name age city")
- # ...but refer to the documentation (pydoc collections.namedtuple) for
- # that. Here a proof of how it works:
- r = Row("Luke", 40, "Amsterdam")
- print("Showing Row:", r)
- print("Showing fields of the row:", r.name, r.age, r.city)
- # Let's take an hypotetical list of results. This Could be the output of
- # some stats file, or maybe from a database (even if the database will
- # probably have some inner type!).
- records = [
- Row("Jane", 15, "London"),
- Row("Georg", 15, "Berlin"),
- Row("Urlika", 20, "Stockholm"),
- Row("Johan", 17, "Stockholm"),
- Row("Aldo", 9, "Rome"),
- Row("Hans", 17, "Berlin")
- Row("Petra", 21, "Berlin")
- ]
- # About itertools? It's an extremely nice library of python, with useful
- # lazy-evaluated functions [ https://en.wikipedia.org/wiki/Lazy_evaluation ].
- # Iterators in general allow to iterate (hence the word) on objects like
- # tuples or lists ("iterables").
- import itertools as it
- # Iterators are very elegant, and if wisely used they can reduce the
- # memory footprint of a program, but on the minus side they get consumed:
- # you cannot use the same iterator twice. Example:
- # I can print lists twice:
- print('-' * 80)
- print("Records")
- for r in records:
- print("\t", r)
- print("...second shot:")
- for r in records:
- print("\t", r)
- print("end.")
- print('-' * 80)
- # But iterators get consumed
- records_iterator = iter(records)
- print("Iterated records")
- for r in records_iterator:
- print("\t", r)
- print("...second shot (will be empty):")
- for r in records_iterator:
- print("\t", r)
- print("end.")
- print('-' * 80)
- # Also I'll include the "attrgetter" operator. Quoting the documentation:
- #
- # After, f=attrgetter('name'), the call f(r) returns r.name.
- from operator import attrgetter as aget
- # For example, this is get_city:
- get_city = aget('city')
- # Since the records are namedtuple with a field called `city`,
- # `aget('city')` will produce a function returning the `city` field:
- r = Row("Luke", 40, "Amsterdam")
- print("The city of Luke is:", get_city(r))
- # Now some interesting use of the groupby function.
- #
- # It basically works as the "Group By" operator on SQL: group together
- # items of a table (in this case an iterator), and allow iteration over
- # single groups. The only caveat: items of the same group must be
- # contiguous, as for the "uniq" command of unix, which is often preceded
- # by the "sort" command, in pipe.
- #
- # For instance, let's take the `records` list, we defined previously, and
- # group the rows by the `city` attribute. The `get_city` we defined
- # before turns out to be a good grouping function So, those items have to
- # be sorted by the relevant field...
- records_sorted = sorted(records, key=get_city)
- # ...Then we can use the groupby:
- for city, rows in it.groupby(records_sorted, get_city):
- print("City:", city, end="\n\t")
- print(*rows, sep="\n\t", end="\n\n")
- # Nice thing: this allows us to do some aggregation (e.g. sum, or average)
- # quite easily. Want to know the average age by city?
- for city, rows in it.groupby(records_sorted, get_city):
- print(
- "City:", city,
- "Avg.age:", sum(map(aget('age'), rows)),
- end="\n\n"
- )
- # Note: sorted() will return *a list*, not an iterator. That's why we can
- # use the it.groupby() function twice on it. If line 99 were
- #
- # records_sorted = iter(sorted(records, key=get_city))
- #
- # Then the second cycle at line 108 would have not worked. Also you may
- # try to cycle with it.groupby on records instead of records_sorted: you
- # will see the results! :)
- # Happy hacking.
- -----BEGIN PGP SIGNATURE-----
- Version: GnuPG v1
- iQIcBAEBAgAGBQJS3aTyAAoJEC+Zq7a6FN+eueYQALImcxP1jJaRrrxtaLqjAZqf
- weYjcOq634NuMg2LFdRUV7KFc0GuW6AClGokXSle9F7hGsJZaxaer1VknkN5vmnw
- fzzs6soA+FN3fopjTmVxLXJvxrQNP+gSk4iuFF09KNU/E+UZfkKx5vpA+xg1wvr6
- R3Bi+jy8mDsz7zHkrv0sDdyIJF9ty6VZe3/afkM3TaaQN7/CRptpkGTJnA9bvkyJ
- eA9p5NyRD1eO9e6GScUeJ/Btn9sJ1kzfkYxdsLr4cRFSsRf3X0IymgUNOOpAAyH4
- +gvyeUw8iowukJD3B0+YTQgfs7A9LUSZNHd+ElmC+eOJxAgmQwuhsxAvXN0I2seI
- +CuNKmf+zF4TGY3bnQHjE2XBk4Q42m7N+gsN1wsd7t4gK/q5jK8Mf8eh8qvXE5tt
- H8yLPc6y9kjglFLFzBtrskDKrKZ9M/rRPUJZJJOZ+rrkEAtOPq4JINKfhZL3tOsI
- rYPKlBhbl8uEuy6w1lChsEn+yUE+Tct5XL/jF+hkLzlkWLCO9FB15Ta6XHxK+xff
- sssbeFOcarFra7ssKK1Hou8LPeoqkUy5AC2ywnp3oOMU3dk6+qXrG0+ROGp5IxXr
- 0JiEGiykUmXqdqLAeSAwm8H2Ac7U9+8LT5g1FUTpMsvQezfburyUjmDVWLiaKcNE
- z6UCQu37bHVaRM0RmLqX
- =+CMW
- -----END PGP SIGNATURE-----
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement