Guest User

Untitled

a guest
Apr 22nd, 2018
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.15 KB | None | 0 0
  1. #import "Foundation/Foundation.h"
  2. #include <libxml/HTMLparser.h>
  3. #include <libxml/HTMLtree.h>
  4. #include <libxml/xpath.h>
  5.  
  6. @interface XPathEvaluator : NSObject
  7. +(NSArray*)arrayWithXPathQuery:(NSString*)query fromHTML:(NSString*)html;
  8. @end
  9.  
  10. @implementation XPathEvaluator
  11. +(NSArray*)arrayWithXPathQuery:(NSString*)query fromHTML:(NSString*)html
  12. {
  13. id pool = [NSAutoreleasePool new];
  14.  
  15. const char *chtml = [html cStringUsingEncoding:NSUTF8StringEncoding];
  16. const char *cxpath = [query cStringUsingEncoding:NSUTF8StringEncoding];
  17.  
  18. NSMutableArray *results = [NSMutableArray new];
  19.  
  20. // XPath
  21. htmlDocPtr doc = htmlParseDoc((xmlChar*)chtml, "UTF-8");
  22. xmlXPathContextPtr xctx = xmlXPathNewContext(doc);
  23. xmlXPathObjectPtr xobj = xmlXPathEval((xmlChar*)cxpath, xctx);
  24. xmlNodeSetPtr nodes = xobj->nodesetval;
  25.  
  26. if(doc == NULL) NSLog(@"parse fail");
  27. if(xobj == NULL) NSLog(@"xpath fail");
  28.  
  29. for(int n = 0; n < xmlXPathNodeSetGetLength(nodes); n++)
  30. {
  31. xmlNodePtr node = nodes->nodeTab[n];
  32. xmlOutputBufferPtr outbuf = xmlAllocOutputBuffer(NULL);
  33.  
  34. if(outbuf == NULL) NSLog(@"outbuf fail");
  35.  
  36. // NSLog(@"==== div.body:%d %s %d ====", n, node->name, node->type);
  37.  
  38. // inner html
  39. xmlNodePtr child = node->children;
  40. while(child)
  41. {
  42. htmlNodeDumpFormatOutput(outbuf, doc, child, "UTF-8", 0);
  43. child = child->next;
  44. }
  45. xmlOutputBufferFlush(outbuf);
  46.  
  47. // attributes
  48. NSMutableDictionary *attributes = [NSMutableDictionary dictionary];
  49. xmlAttr *attr = node->properties;
  50. while(attr)
  51. {
  52. [attributes
  53. setObject:[NSString stringWithCString:(char*)attr->children->content encoding:NSUTF8StringEncoding]
  54. forKey: [NSString stringWithCString:(char*)attr->name encoding:NSUTF8StringEncoding]
  55. ];
  56. attr = attr->next;
  57. }
  58.  
  59. // Dictionary
  60. NSString *name = [NSString stringWithCString:(char*)node->name encoding:NSUTF8StringEncoding];
  61. NSString *content = [[NSString
  62. stringWithCString:(char*)outbuf->buffer->content encoding:NSUTF8StringEncoding
  63. ] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
  64. NSDictionary *nodeinfo = [NSDictionary dictionaryWithObjectsAndKeys:
  65. name, @"name",
  66. attributes, @"attributes",
  67. content, @"content",
  68. nil
  69. ];
  70.  
  71. [results addObject:nodeinfo];
  72.  
  73. xmlOutputBufferClose(outbuf);
  74. }
  75.  
  76. xmlXPathFreeObject(xobj);
  77. xmlXPathFreeContext(xctx);
  78. xmlFreeDoc(doc);
  79.  
  80. [pool release];
  81.  
  82. return [results autorelease];
  83. }
  84. @end
  85.  
  86. int main(int argc, char **argv)
  87. {
  88. id pool = [NSAutoreleasePool new];
  89.  
  90. char *cxpath = (argc > 1) ? argv[1] : "//span[@class='msg']";
  91. char *chtml = (argc > 3) ? argv[2] : "<span class=\"msg\">oppai</span><span>futomomo</span>";
  92.  
  93. NSString *query = [NSString stringWithCString:cxpath encoding:NSUTF8StringEncoding];
  94. NSString *html = [NSString stringWithCString:chtml encoding:NSUTF8StringEncoding];
  95. NSArray *nodes = [XPathEvaluator arrayWithXPathQuery:query fromHTML:html];
  96.  
  97. NSLog(@"%@", nodes);
  98. NSLog(@"%d", [nodes count]);
  99.  
  100. [pool release];
  101.  
  102.  
  103. return 0;
  104. }
Add Comment
Please, Sign In to add comment