Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #import "Foundation/Foundation.h"
- #include <libxml/HTMLparser.h>
- #include <libxml/HTMLtree.h>
- #include <libxml/xpath.h>
- @interface XPathEvaluator : NSObject
- +(NSArray*)arrayWithXPathQuery:(NSString*)query fromHTML:(NSString*)html;
- @end
- @implementation XPathEvaluator
- +(NSArray*)arrayWithXPathQuery:(NSString*)query fromHTML:(NSString*)html
- {
- id pool = [NSAutoreleasePool new];
- const char *chtml = [html cStringUsingEncoding:NSUTF8StringEncoding];
- const char *cxpath = [query cStringUsingEncoding:NSUTF8StringEncoding];
- NSMutableArray *results = [NSMutableArray new];
- // XPath
- htmlDocPtr doc = htmlParseDoc((xmlChar*)chtml, "UTF-8");
- xmlXPathContextPtr xctx = xmlXPathNewContext(doc);
- xmlXPathObjectPtr xobj = xmlXPathEval((xmlChar*)cxpath, xctx);
- xmlNodeSetPtr nodes = xobj->nodesetval;
- if(doc == NULL) NSLog(@"parse fail");
- if(xobj == NULL) NSLog(@"xpath fail");
- for(int n = 0; n < xmlXPathNodeSetGetLength(nodes); n++)
- {
- xmlNodePtr node = nodes->nodeTab[n];
- xmlOutputBufferPtr outbuf = xmlAllocOutputBuffer(NULL);
- if(outbuf == NULL) NSLog(@"outbuf fail");
- // NSLog(@"==== div.body:%d %s %d ====", n, node->name, node->type);
- // inner html
- xmlNodePtr child = node->children;
- while(child)
- {
- htmlNodeDumpFormatOutput(outbuf, doc, child, "UTF-8", 0);
- child = child->next;
- }
- xmlOutputBufferFlush(outbuf);
- // attributes
- NSMutableDictionary *attributes = [NSMutableDictionary dictionary];
- xmlAttr *attr = node->properties;
- while(attr)
- {
- [attributes
- setObject:[NSString stringWithCString:(char*)attr->children->content encoding:NSUTF8StringEncoding]
- forKey: [NSString stringWithCString:(char*)attr->name encoding:NSUTF8StringEncoding]
- ];
- attr = attr->next;
- }
- // Dictionary
- NSString *name = [NSString stringWithCString:(char*)node->name encoding:NSUTF8StringEncoding];
- NSString *content = [[NSString
- stringWithCString:(char*)outbuf->buffer->content encoding:NSUTF8StringEncoding
- ] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
- NSDictionary *nodeinfo = [NSDictionary dictionaryWithObjectsAndKeys:
- name, @"name",
- attributes, @"attributes",
- content, @"content",
- nil
- ];
- [results addObject:nodeinfo];
- xmlOutputBufferClose(outbuf);
- }
- xmlXPathFreeObject(xobj);
- xmlXPathFreeContext(xctx);
- xmlFreeDoc(doc);
- [pool release];
- return [results autorelease];
- }
- @end
- int main(int argc, char **argv)
- {
- id pool = [NSAutoreleasePool new];
- char *cxpath = (argc > 1) ? argv[1] : "//span[@class='msg']";
- char *chtml = (argc > 3) ? argv[2] : "<span class=\"msg\">oppai</span><span>futomomo</span>";
- NSString *query = [NSString stringWithCString:cxpath encoding:NSUTF8StringEncoding];
- NSString *html = [NSString stringWithCString:chtml encoding:NSUTF8StringEncoding];
- NSArray *nodes = [XPathEvaluator arrayWithXPathQuery:query fromHTML:html];
- NSLog(@"%@", nodes);
- NSLog(@"%d", [nodes count]);
- [pool release];
- return 0;
- }
Add Comment
Please, Sign In to add comment