pymen

Simple example of pasting XML into MongoDb

Nov 8th, 2012
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.21 KB | None | 0 0
  1. # !/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3.  
  4. xml_data = """<?xml version="1.0"?>
  5. <catalog>
  6.   <book id="bk101">
  7.      <author>Gambardella, Matthew</author>
  8.      <title>XML Developer's Guide</title>
  9.      <genre>Computer</genre>
  10.      <price>44.95</price>
  11.      <publish_date>2000-10-01</publish_date>
  12.      <description>An in-depth look at creating applications
  13.      with XML.</description>
  14.   </book>
  15.   <book id="bk102">
  16.      <author>Ralls, Kim</author>
  17.      <title>Midnight Rain</title>
  18.      <genre>Fantasy</genre>
  19.      <price>5.95</price>
  20.      <publish_date>2000-12-16</publish_date>
  21.      <description>A former architect battles corporate zombies,
  22.      an evil sorceress, and her own childhood to become queen
  23.      of the world.</description>
  24.   </book>
  25.   <book id="bk103">
  26.      <author>Corets, Eva</author>
  27.      <title>Maeve Ascendant</title>
  28.      <genre>Fantasy</genre>
  29.      <price>5.95</price>
  30.      <publish_date>2000-11-17</publish_date>
  31.      <description>After the collapse of a nanotechnology
  32.      society in England, the young survivors lay the
  33.      foundation for a new society.</description>
  34.   </book>
  35.   <book id="bk104">
  36.      <author>Corets, Eva</author>
  37.      <title>Oberon's Legacy</title>
  38.      <genre>Fantasy</genre>
  39.      <price>5.95</price>
  40.      <publish_date>2001-03-10</publish_date>
  41.      <description>In post-apocalypse England, the mysterious
  42.      agent known only as Oberon helps to create a new life
  43.      for the inhabitants of London. Sequel to Maeve
  44.      Ascendant.</description>
  45.   </book>
  46.   <book id="bk105">
  47.      <author>Corets, Eva</author>
  48.      <title>The Sundered Grail</title>
  49.      <genre>Fantasy</genre>
  50.      <price>5.95</price>
  51.      <publish_date>2001-09-10</publish_date>
  52.      <description>The two daughters of Maeve, half-sisters,
  53.      battle one another for control of England. Sequel to
  54.      Oberon's Legacy.</description>
  55.   </book>
  56.   <book id="bk106">
  57.      <author>Randall, Cynthia</author>
  58.      <title>Lover Birds</title>
  59.      <genre>Romance</genre>
  60.      <price>4.95</price>
  61.      <publish_date>2000-09-02</publish_date>
  62.      <description>When Carla meets Paul at an ornithology
  63.      conference, tempers fly as feathers get ruffled.</description>
  64.   </book>
  65.   <book id="bk107">
  66.      <author>Thurman, Paula</author>
  67.      <title>Splish Splash</title>
  68.      <genre>Romance</genre>
  69.      <price>4.95</price>
  70.      <publish_date>2000-11-02</publish_date>
  71.      <description>A deep sea diver finds true love twenty
  72.      thousand leagues beneath the sea.</description>
  73.   </book>
  74.   <book id="bk108">
  75.      <author>Knorr, Stefan</author>
  76.      <title>Creepy Crawlies</title>
  77.      <genre>Horror</genre>
  78.      <price>4.95</price>
  79.      <publish_date>2000-12-06</publish_date>
  80.      <description>An anthology of horror stories about roaches,
  81.      centipedes, scorpions  and other insects.</description>
  82.   </book>
  83.   <book id="bk109">
  84.      <author>Kress, Peter</author>
  85.      <title>Paradox Lost</title>
  86.      <genre>Science Fiction</genre>
  87.      <price>6.95</price>
  88.      <publish_date>2000-11-02</publish_date>
  89.      <description>After an inadvertant trip through a Heisenberg
  90.      Uncertainty Device, James Salway discovers the problems
  91.      of being quantum.</description>
  92.   </book>
  93.   <book id="bk110">
  94.      <author>O'Brien, Tim</author>
  95.      <title>Microsoft .NET: The Programming Bible</title>
  96.      <genre>Computer</genre>
  97.      <price>36.95</price>
  98.      <publish_date>2000-12-09</publish_date>
  99.      <description>Microsoft's .NET initiative is explored in
  100.      detail in this deep programmer's reference.</description>
  101.   </book>
  102.   <book id="bk111">
  103.      <author>O'Brien, Tim</author>
  104.      <title>MSXML3: A Comprehensive Guide</title>
  105.      <genre>Computer</genre>
  106.      <price>36.95</price>
  107.      <publish_date>2000-12-01</publish_date>
  108.      <description>The Microsoft MSXML3 parser is covered in
  109.      detail, with attention to XML DOM interfaces, XSLT processing,
  110.      SAX and more.</description>
  111.   </book>
  112.   <book id="bk112">
  113.      <author>Galos, Mike</author>
  114.      <title>Visual Studio 7: A Comprehensive Guide</title>
  115.      <genre>Computer</genre>
  116.      <price>49.95</price>
  117.      <publish_date>2001-04-16</publish_date>
  118.      <description>Microsoft Visual Studio 7 is explored in depth,
  119.      looking at how Visual Basic, Visual C++, C#, and ASP+ are
  120.      integrated into a comprehensive development
  121.      environment.</description>
  122.   </book>
  123. </catalog>
  124. """
  125.  
  126. import xml2json
  127. import json
  128. import re
  129. import pymongo
  130.  
  131. def clean_extra_spaces(text):
  132.     '''
  133.    Returns text without extra spaces
  134.    '''
  135.     return re.sub("\s+", " ", unicode(text))
  136.  
  137. # connect to mongo
  138. connection = pymongo.Connection("mongodb://localhost", safe=True)
  139. db=connection.catalog
  140. books = db.books
  141. books.drop()
  142.  
  143. xml = clean_extra_spaces(xml_data)
  144. #load xml
  145. catalog_json = xml2json.xml2json(xml)
  146. catalog = json.loads(catalog_json)
  147.  
  148. #cleaning values and inserting into Mongo,
  149. for book in catalog.values():
  150.     for b in book.values():
  151.         for book_dict in b:
  152.             books.insert(book_dict)
  153.  
  154. #print results
  155. print books.count()
  156. for book in books.find({},{"title":1,"_id":0}):
  157.     print book
Advertisement
Add Comment
Please, Sign In to add comment