/*─────────────────────────────────────────────────────────────── readHTML(String input, Long timeoutMs, HashMap map, Object returnNode, Object setLocalVars) **Load and Extract HTML Data** Loads a webpage or parses a raw HTML/XML string invisibly, then extracts structured data using XPath or CSS selectors. Supports CSS @attribute syntax (e.g. "a[href]@href" to extract href values). Automatically detects: - Whether the input is a URL or an HTML/XML string. - Whether each selector is XPath or CSS. - Optional CSS attribute extraction via "@attr". - Optional full node HTML output or readable text. If setLocalVars is true, extracted values are stored into Tasker local variables. Otherwise, returns a JSON string {key:[values]}. Arguments: - input: URL or HTML/XML string to load or parse. - timeoutMs: Time in milliseconds to wait before extracting (default 3000). - map: Key-to-selector mapping for XPath or CSS. - returnNode: true to return full node HTML, false/null for text content. - setLocalVars: true to set Tasker local variables instead of returning JSON. Example: map = new HashMap(); map.put("links", "a[href]@href"); map.put("text", "//div[@data-container-id='main-col']"); result = readHTML("https://example.com", 2000, map, false, true); ──────────────────────────────────────────────────────────────*/ import android.webkit.WebView; import android.webkit.ValueCallback; import android.view.ViewGroup; import android.view.Gravity; import android.widget.FrameLayout; import android.view.Window; import android.view.WindowManager; import java.util.HashMap; import java.util.Iterator; import java.util.ArrayList; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import io.reactivex.subjects.SingleSubject; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; readHTML(input, timeoutMs, map, returnNode, setLocalVars) { if (input == null || input.trim().length() == 0) return "Error: missing input"; if (timeoutMs == null) timeoutMs = 3000; isUrl = input.matches("^(https?|file)://.*|^[a-zA-Z0-9_-]+\\.[a-z]{2,}.*"); resultSignal = SingleSubject.create(); consumer = new Consumer() { accept(activityObj) { final android.app.Activity activity = (android.app.Activity) activityObj; /* Transparent, non-blocking window */ window = activity.getWindow(); window.setFlags( WindowManager.LayoutParams.FLAG_NOT_TOUCH_MODAL | WindowManager.LayoutParams.FLAG_NOT_FOCUSABLE | WindowManager.LayoutParams.FLAG_LAYOUT_NO_LIMITS, WindowManager.LayoutParams.FLAG_NOT_TOUCH_MODAL | WindowManager.LayoutParams.FLAG_NOT_FOCUSABLE | WindowManager.LayoutParams.FLAG_LAYOUT_NO_LIMITS ); window.setDimAmount(0); window.setBackgroundDrawable( new android.graphics.drawable.ColorDrawable(android.graphics.Color.TRANSPARENT) ); webView = new WebView(activity); webView.getSettings().setJavaScriptEnabled(true); webView.setAlpha(0); params = new FrameLayout.LayoutParams(1, 1); params.gravity = Gravity.TOP | Gravity.LEFT; activity.addContentView(webView, params); final Runnable extractor = new Runnable() { public void run() { activity.runOnUiThread(new Runnable() { public void run() { extractFull = (returnNode != null && ("" + returnNode).equalsIgnoreCase("true")); setLocals = (setLocalVars != null && ("" + setLocalVars).equalsIgnoreCase("true")); /*─────────────────────────────── * Build JavaScript dynamically *───────────────────────────────*/ js = """ (function(){ function getReadableText(node) { if (!node) return ""; var txt = node.innerText || node.textContent || ""; txt = txt.replace(/\\r\\n/g, "\\n").replace(/\\r/g, "\\n"); txt = txt.replace(/\\n{3,}/g, "\\n\\n"); return txt.trim(); } var out = {}; %s return JSON.stringify(out); })(); """; innerJs = ""; it = map.keySet().iterator(); while (it.hasNext()) { key = it.next(); selector = map.get(key); // Detect if it's XPath or CSS selector isXPath = selector.startsWith("/") || selector.startsWith("(") || selector.contains("::") || selector.startsWith("//"); // If CSS and has @attribute syntax (e.g. a[href]@href) hasAttr = !isXPath && selector.contains("@"); attr = ""; cssSelector = selector; if (hasAttr) { parts = selector.split("@", 2); cssSelector = parts[0]; attr = parts[1]; } if (isXPath) { // XPath extraction innerJs += String.format(""" try { var res = document.evaluate("%s", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); var arr = []; for (var i = 0; i < res.snapshotLength; i++) { var node = res.snapshotItem(i); if (%s) { arr.push(node.outerHTML || node.nodeValue || ''); } else { var val = (node.nodeType === 2 ? node.nodeValue : getReadableText(node)); arr.push(val); } } out['%s'] = arr; } catch(e) { out['%s'] = []; } """, selector.replace("\"", "\\\""), extractFull ? "true" : "false", key, key); } else if (hasAttr) { // CSS + @attribute extraction innerJs += String.format(""" try { var nodes = document.querySelectorAll("%s"); var arr = []; for (var i = 0; i < nodes.length; i++) { var node = nodes[i]; var val = node.getAttribute("%s") || ''; arr.push(val); } out['%s'] = arr; } catch(e) { out['%s'] = []; } """, cssSelector.replace("\"", "\\\""), attr, key, key); } else { // Standard CSS selector (node/text extraction) innerJs += String.format(""" try { var nodes = document.querySelectorAll("%s"); var arr = []; for (var i = 0; i < nodes.length; i++) { var node = nodes[i]; if (%s) { arr.push(node.outerHTML || ''); } else { var val = getReadableText(node); arr.push(val); } } out['%s'] = arr; } catch(e) { out['%s'] = []; } """, cssSelector.replace("\"", "\\\""), extractFull ? "true" : "false", key, key); } } js = String.format(js, innerJs); webView.evaluateJavascript(js, new ValueCallback() { public void onReceiveValue(Object resultRaw) { try { resultJson = new JSONArray("[" + resultRaw + "]").getString(0); obj = new JSONObject(resultJson); if (setLocals) { keys = obj.keys(); while (keys.hasNext()) { k = keys.next(); arr = obj.getJSONArray(k); for (i = 0; i < arr.length(); i++) { val = arr.optString(i, ""); if (i == 0) { tasker.setVariable(k, val); tasker.setVariable(k + "1", val); } else { tasker.setVariable(k + (i + 1), val); } } } resultSignal.onSuccess("OK (set local vars)"); } else { resultSignal.onSuccess(obj.toString(2)); } } catch (Exception e) { tasker.log("Parse error: " + e.getMessage()); resultSignal.onSuccess("{}"); } finally { activity.finish(); } } }); } }); } }; if (isUrl) { webView.loadUrl(input); new Thread(new Runnable() { public void run() { try { Thread.sleep(timeoutMs); extractor.run(); } catch (Exception e) { tasker.log("Timeout error: " + e.getMessage()); resultSignal.onSuccess("{}"); activity.finish(); } } }).start(); } else { webView.loadDataWithBaseURL(null, input, "text/html", "UTF-8", null); new Thread(new Runnable() { public void run() { try { Thread.sleep(500); extractor.run(); } catch (Exception e) { tasker.log("Parse error: " + e.getMessage()); resultSignal.onSuccess("{}"); activity.finish(); } } }).start(); } } }; tasker.doWithActivity(consumer); result = resultSignal.blockingGet(); return result; } /*─────────────────────────────────────────────────────────────── * Example usage *──────────────────────────────────────────────────────────────*/ example() { // Example 1: URL input url = "https://www.google.com/search?q=History of the founding of Apple"; map = new HashMap(); map.put("links", "//a/@href"); map.put("result_text", "div[data-container-id='main-col'] > div > div[data-sfc-cp='']"); map.put("result_subtext", "//div[@data-container-id='main-col']/div/ul"); result = readHTML(url, 2000, map, false, true); // Example 2: HTML input // html = "
LinkHello
"; // map = new HashMap(); // map.put("hrefs", "a[href]@href"); // map.put("text", "//p"); // result = readHTML(html, null, map, false, true); // return result; } return example();