Share Pastebin
Guest
Public paste!

dom96

By: a guest | Mar 21st, 2010 | Syntax: Python | Size: 11.08 KB | Hits: 37 | Expires: Never
Copy text to clipboard
  1. #
  2. #
  3. #            Nimrod's Runtime Library
  4. #        (c) Copyright 2010 Dominik Picheta, Andreas Rumpf
  5. #
  6. #    See the file "copying.txt", included in this
  7. #    distribution, for details about the copyright.
  8. #
  9.  
  10. ## This module implements a simple HTTP client that can be used to retrieve
  11. ## webpages/other data.
  12. ##
  13. ## Retrieving a website
  14. ## ====================
  15. ##
  16. ## This example uses HTTP GET to retrieve
  17. ## ``http://google.com``
  18. ##
  19. ## .. code-block:: nimrod
  20. ##   echo(getContent("http://google.com"))
  21. ##
  22. ## Using HTTP POST
  23. ## ===============
  24. ##
  25. ## This example demonstrates the usage of the W3 HTML Validator, it
  26. ## uses ``multipart/form-data`` as the ``Content-Type`` to send the HTML to
  27. ## the server.
  28. ##
  29. ## .. code-block:: nimrod
  30. ##   var headers: string = "Content-Type: multipart/form-data; boundary=xyz\c\L"
  31. ##   var body: string = "--xyz\c\L"
  32. ##   # soap 1.2 output
  33. ##   body.add("Content-Disposition: form-data; name=\"output\"\c\L")
  34. ##   body.add("\c\Lsoap12\c\L")
  35. ##    
  36. ##   # html
  37. ##   body.add("--xyz\c\L")
  38. ##   body.add("Content-Disposition: form-data; name=\"uploaded_file\";" &
  39. ##            " filename=\"test.html\"\c\L")
  40. ##   body.add("Content-Type: text/html\c\L")
  41. ##   body.add("\c\L<html><head></head><body><p>test</p></body></html>\c\L")
  42. ##   body.add("--xyz--")
  43. ##    
  44. ##   echo(postContent("http://validator.w3.org/check", headers, body))
  45.  
  46. import sockets, strutils, parseurl, parseutils, strtabs
  47.  
  48. type
  49.   TResponse* = tuple[
  50.     version: string,
  51.     status: string,
  52.     headers: PStringTable,
  53.     body: string]
  54.  
  55.   EInvalidProtocol* = object of EBase ## exception that is raised when server
  56.                                       ## does not conform to the implemented
  57.                                       ## protocol
  58.  
  59.   EHttpRequestErr* = object of EBase ## Thrown in the ``getContent`` proc
  60.                                      ## and ``postContent`` proc,
  61.                                      ## when the server returns an error
  62.  
  63. proc httpError(msg: string) =
  64.   var e: ref EInvalidProtocol
  65.   new(e)
  66.   e.msg = msg
  67.   raise e
  68.  
  69. proc fileError(msg: string) =
  70.   var e: ref EIO
  71.   new(e)
  72.   e.msg = msg
  73.   raise e
  74.  
  75. proc charAt(d: var string, i: var int, s: TSocket): char {.inline.} =
  76.   result = d[i]
  77.   while result == '\0':
  78.     d = s.recv()
  79.     i = 0
  80.     result = d[i]
  81.  
  82. proc parseChunks(d: var string, start: int, s: TSocket): string =
  83.   # get chunks:
  84.   var i = start
  85.   result = ""
  86.   while true:
  87.     var chunkSize = 0
  88.     var digitFound = false
  89.     while true:
  90.       case d[i]
  91.       of '0'..'9':
  92.         digitFound = true
  93.         chunkSize = chunkSize shl 4 or (ord(d[i]) - ord('0'))
  94.       of 'a'..'f':
  95.         digitFound = true
  96.         chunkSize = chunkSize shl 4 or (ord(d[i]) - ord('a') + 10)
  97.       of 'A'..'F':
  98.         digitFound = true
  99.         chunkSize = chunkSize shl 4 or (ord(d[i]) - ord('A') + 10)
  100.       of '\0':
  101.         d = s.recv()
  102.         i = -1
  103.       else: break
  104.       inc(i)
  105.     if not digitFound: httpError("Chunksize expected")
  106.     if chunkSize <= 0: break
  107.     while charAt(d, i, s) notin {'\C', '\L', '\0'}: inc(i)
  108.     if charAt(d, i, s) == '\C': inc(i)
  109.     if charAt(d, i, s) == '\L': inc(i)
  110.     else: httpError("CR-LF after chunksize expected")
  111.    
  112.     var x = copy(d, i, i+chunkSize-1)
  113.     var size = x.len
  114.     result.add(x)
  115.     inc(i, size)
  116.     if size < chunkSize:
  117.       # read in the rest:
  118.       var missing = chunkSize - size
  119.       var L = result.len
  120.       setLen(result, L + missing)    
  121.       while missing > 0:
  122.         var bytesRead = s.recv(addr(result[L]), missing)
  123.         inc(L, bytesRead)
  124.         dec(missing, bytesRead)
  125.       # next chunk:
  126.       d = s.recv()
  127.       i = 0
  128.     # skip trailing CR-LF:
  129.     while charAt(d, i, s) in {'\C', '\L'}: inc(i)
  130.  
  131. proc parseBody(d: var string, start: int, s: TSocket,
  132.                headers: PStringTable): string =
  133.   if headers["Transfer-Encoding"] == "chunked":
  134.     result = parseChunks(d, start, s)
  135.   else:
  136.     result = copy(d, start)
  137.     # -REGION- Content-Length
  138.     # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.3
  139.     var contentLengthHeader = headers["Content-Length"]
  140.     if contentLengthHeader != "":
  141.       var length = contentLengthHeader.parseint()
  142.       while result.len() < length: result.add(s.recv())
  143.     else:
  144.       # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.4 TODO
  145.      
  146.       # -REGION- Connection: Close
  147.       # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.5
  148.       if headers["Connection"] == "close":
  149.         while True:
  150.           var moreData = recv(s)
  151.           if moreData.len == 0: break
  152.           result.add(moreData)
  153.  
  154. proc parseResponse(s: TSocket): TResponse =
  155.   var d = s.recv()
  156.   var i = 0
  157.  
  158.   # Parse the version
  159.   # Parses the first line of the headers
  160.   # ``HTTP/1.1`` 200 OK
  161.   var L = skipIgnoreCase(d, "HTTP/1.1", i)
  162.   if L > 0:
  163.     result.version = "1.1"
  164.     inc(i, L)
  165.   else:
  166.     L = skipIgnoreCase(d, "HTTP/1.0", i)
  167.     if L > 0:
  168.       result.version = "1.0"
  169.       inc(i, L)
  170.     else:
  171.       httpError("invalid HTTP header")
  172.   L = skipWhiteSpace(d, i)
  173.   if L <= 0: httpError("invalid HTTP header")
  174.   inc(i, L)
  175.  
  176.   result.status = ""
  177.   while d[i] notin {'\C', '\L', '\0'}:
  178.     result.status.add(d[i])
  179.     inc(i)
  180.   if d[i] == '\C': inc(i)
  181.   if d[i] == '\L': inc(i)
  182.   else: httpError("invalid HTTP header, CR-LF expected")
  183.  
  184.   # Parse the headers
  185.   # Everything after the first line leading up to the body
  186.   # htype: hvalue
  187.   result.headers = newStringTable(modeCaseInsensitive)
  188.   while true:
  189.     var key = ""
  190.     while d[i] != ':':
  191.       if d[i] == '\0': httpError("invalid HTTP header, ':' expected")
  192.       key.add(d[i])
  193.       inc(i)
  194.     inc(i) # skip ':'
  195.     if d[i] == ' ': inc(i) # skip if the character is a space
  196.     var val = ""
  197.     while d[i] notin {'\C', '\L', '\0'}:
  198.       val.add(d[i])
  199.       inc(i)
  200.    
  201.     result.headers[key] = val
  202.    
  203.     if d[i] == '\C': inc(i)
  204.     if d[i] == '\L': inc(i)
  205.     else: httpError("invalid HTTP header, CR-LF expected")
  206.    
  207.     if d[i] == '\C': inc(i)
  208.     if d[i] == '\L':
  209.       inc(i)
  210.       break
  211.    
  212.   result.body = parseBody(d, i, s, result.headers)
  213.  
  214. type
  215.   THttpMethod* = enum ## the requested HttpMethod
  216.     httpHEAD,         ## Asks for the response identical to the one that would
  217.                       ## correspond to a GET request, but without the response
  218.                       ## body.
  219.     httpGET,          ## Retrieves the specified resource.
  220.     httpPOST,         ## Submits data to be processed to the identified
  221.                       ## resource. The data is included in the body of the
  222.                       ## request.
  223.     httpPUT,          ## Uploads a representation of the specified resource.
  224.     httpDELETE,       ## Deletes the specified resource.
  225.     httpTRACE,        ## Echoes back the received request, so that a client
  226.                       ## can see what intermediate servers are adding or
  227.                       ## changing in the request.
  228.     httpOPTIONS,      ## Returns the HTTP methods that the server supports
  229.                       ## for specified address.
  230.     httpCONNECT       ## Converts the request connection to a transparent
  231.                       ## TCP/IP tunnel, usually used for proxies.
  232.  
  233. proc request*(url: string, httpMethod = httpGET, extraHeaders = "",
  234.               body = ""): TResponse =
  235.   ## | Requests ``url`` with the specified ``httpMethod``.
  236.   ## | Extra headers can be specified and must be seperated by ``\c\L``
  237.   var r = parseUrl(url)
  238.  
  239.   var headers = copy($httpMethod, len("http"))
  240.   if r.path != "" and r.path != "/":
  241.     headers.add(" " & r.path & r.query)
  242.   headers.add(" / HTTP/1.1\c\L")
  243.  
  244.   add(headers, "Host: " & r.hostname & "\c\L")
  245.   add(headers, extraHeaders)
  246.   add(headers, "\c\L")
  247.  
  248.   var s = socket()
  249.   s.connect(r.hostname, TPort(80))
  250.   s.send(headers)
  251.   if body != "":
  252.     s.send(body)
  253.  
  254.   result = parseResponse(s)
  255.   s.close()
  256.  
  257. proc redirection(status: string): bool =
  258.   const redirectionNRs = ["301", "302", "303", "307"]
  259.   for i in items(redirectionNRs):
  260.     if status.startsWith(i):
  261.       return True
  262.  
  263. proc get*(url: string, maxRedirects = 5): TResponse =
  264.   ## | GET's the ``url`` and returns a ``TResponse`` object
  265.   ## | This proc also handles redirection
  266.   result = request(url)
  267.   for i in 1..maxRedirects:
  268.     if result.status.redirection():
  269.       var locationHeader = result.headers["Location"]
  270.       if locationHeader == "": httpError("location header expected")
  271.       result = request(locationHeader)
  272.      
  273. proc getContent*(url: string): string =
  274.   ## | GET's the body and returns it as a string.
  275.   ## | Raises exceptions for the status codes ``4xx`` and ``5xx``
  276.   var r = get(url)
  277.   if r.status[0] in {'4','5'}:
  278.     raise newException(EHTTPRequestErr, r.status)
  279.   else:
  280.     return r.body
  281.  
  282. proc post*(url: string, extraHeaders = "", body = "",
  283.            maxRedirects = 5): TResponse =
  284.   ## | POST's ``body`` to the ``url`` and returns a ``TResponse`` object.
  285.   ## | This proc adds the necessary Content-Length header.
  286.   ## | This proc also handles redirection.
  287.   var xh = extraHeaders & "Content-Length: " & $len(body) & "\c\L"
  288.   result = request(url, httpPOST, xh, body)
  289.   for i in 1..maxRedirects:
  290.     if result.status.redirection():
  291.       var locationHeader = result.headers["Location"]
  292.       if locationHeader == "": httpError("location header expected")
  293.       var meth = if result.status != "307": httpGet else: httpPost
  294.       result = request(locationHeader, meth, xh, body)
  295.  
  296. proc postContent*(url: string, extraHeaders = "", body = ""): string =
  297.   ## | POST's ``body`` to ``url`` and returns the response's body as a string
  298.   ## | Raises exceptions for the status codes ``4xx`` and ``5xx``
  299.   var r = post(url, extraHeaders, body)
  300.   if r.status[0] in {'4','5'}:
  301.     raise newException(EHTTPRequestErr, r.status)
  302.   else:
  303.     return r.body
  304.  
  305. proc downloadFile*(url: string, outputFilename: string) =
  306.   ## Downloads ``url`` and saves it to ``outputFilename``
  307.   var f: TFile
  308.   if open(f, outputFilename, fmWrite):
  309.     f.write(getContent(url))
  310.     f.close()
  311.   else:
  312.     fileError("Unable to open file")
  313.  
  314.  
  315. when isMainModule:
  316.   #downloadFile("http://force7.de/nimrod/index.html", "nimrodindex.html")
  317.   #downloadFile("http://www.httpwatch.com/", "ChunkTest.html")
  318.   #downloadFile("http://validator.w3.org/check?uri=http%3A%2F%2Fgoogle.com",
  319.   # "validator.html")
  320.  
  321.   #var r = get("http://validator.w3.org/check?uri=http%3A%2F%2Fgoogle.com&
  322.   #  charset=%28detect+automatically%29&doctype=Inline&group=0")
  323.  
  324.   var headers: string = "Content-Type: multipart/form-data; boundary=xyz\c\L"
  325.   var body: string = "--xyz\c\L"
  326.   # soap 1.2 output
  327.   body.add("Content-Disposition: form-data; name=\"output\"\c\L")
  328.   body.add("\c\Lsoap12\c\L")
  329.  
  330.   # html
  331.   body.add("--xyz\c\L")
  332.   body.add("Content-Disposition: form-data; name=\"uploaded_file\";" &
  333.            " filename=\"test.html\"\c\L")
  334.   body.add("Content-Type: text/html\c\L")
  335.   body.add("\c\L<html><head></head><body><p>test</p></body></html>\c\L")
  336.   body.add("--xyz--")
  337.  
  338.   echo(postContent("http://validator.w3.org/check", headers, body))