Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # ~/ComfyUI/custom_nodes/llm_stream_node.py
- import requests, json, sys
- class LLMStreamNode:
- CATEGORY = "LLM"
- OUTPUT_NODE = True
- RETURN_TYPES = ("STRING",)
- FUNCTION = "generate"
- DESCRIPTION = (
- "Connect to any OpenAI-compatible LLM endpoint.\n\n"
- " • Set 'Max Tokens' to 0 for no limit.\n"
- " • 'Deterministic' forces greedy sampling.\n"
- " • Disable 'Enable Thinking' to inject '/no_think'.\n"
- " • 'Stream console' to stream tokens in ComfyUI console."
- )
- # ---------------- strip reasoning helper -------------
- @staticmethod
- def _strip(txt, start_tag, end_tag):
- out, i = "", 0
- while (s := txt.find(start_tag, i)) != -1:
- out += txt[i:s]
- e = txt.find(end_tag, s)
- if e == -1:
- break
- i = e + len(end_tag)
- out += txt[i:]
- return out.lstrip()
- # ---------------- UI spec ----------------------------
- @classmethod
- def INPUT_TYPES(cls):
- return {"required": {
- "endpoint": ("STRING", {
- "default": "http://127.0.0.1:5000",
- "tooltip": "Base URL of OpenAI-compatible LLM server (no /v1/…)"
- }),
- "prompt": ("STRING", {"multiline": True}),
- "max_tokens": ("INT", {
- "default": 0, "min": 0, "max": 32768, "step": 512,
- "tooltip": "Maximum tokens to generate; set to 0 to disable any limit"
- }),
- "seed": ("INT", {"default": 1, "min": -1, "max": 2**31-1}),
- "deterministic": ("BOOLEAN", {
- "default": False,
- "tooltip": "Force temperature=0, top_p=1, top_k=0, repeat_penalty=1.0"
- }),
- "enable_thinking": ("BOOLEAN", {
- "default": True,
- "tooltip": "If False, injects `/no_think` as a system message"
- }),
- "stream_console": ("BOOLEAN", {
- "default": True,
- "tooltip": "Print full token stream (thinking + answer) to console"
- }),
- "think_start_tag": ("STRING", {"default": "<think>"}),
- "think_end_tag": ("STRING", {"default": "</think>"})
- }}
- # ---------------- main -------------------------------
- def generate(self, endpoint, prompt, max_tokens, seed,
- deterministic, enable_thinking, stream_console,
- think_start_tag, think_end_tag):
- url = f"{endpoint.rstrip('/')}/v1/chat/completions"
- sys_prompt = "/no_think" if not enable_thinking else "/think"
- body = {
- "messages": [
- {"role": "system", "content": sys_prompt},
- {"role": "user", "content": prompt}
- ],
- "stream": stream_console
- }
- if max_tokens > 0:
- body["max_tokens"] = max_tokens
- # only set sampler params in deterministic mode
- if deterministic:
- body.update(dict(
- temperature=0,
- top_p=1,
- top_k=0,
- repeat_penalty=1.0
- ))
- if seed >= 0:
- body["seed"] = seed
- # -------- streaming branch --------
- if stream_console:
- answer, thinking = "", False
- try:
- with requests.post(url, json=body, stream=True, timeout=None) as r:
- r.encoding = "utf-8"
- r.raise_for_status()
- for raw in r.iter_lines(decode_unicode=True):
- if not raw or raw == "data: [DONE]" or not raw.startswith("data:"):
- continue
- delta = json.loads(raw[5:])["choices"][0].get("delta") or {}
- text = delta.get("content") or ""
- if text:
- sys.stdout.write(text)
- sys.stdout.flush()
- if think_start_tag in text:
- thinking = True
- continue
- if think_end_tag in text:
- thinking = False
- answer += text.split(think_end_tag, 1)[1]
- continue
- if not thinking:
- answer += text
- sys.stdout.write("\n")
- except Exception as e:
- answer = f"Error: {e}"
- return (answer.lstrip(),)
- # -------- blocking branch ---------
- try:
- r = requests.post(url, json=body, timeout=180)
- r.encoding = "utf-8"
- r.raise_for_status()
- raw = r.json()["choices"][0]["message"]["content"]
- answer = self._strip(raw, think_start_tag, think_end_tag)
- except Exception as e:
- answer = f"Error: {e}"
- return (answer.lstrip(),)
- # register
- NODE_CLASS_MAPPINGS = {"LLMStream": LLMStreamNode}
- NODE_DISPLAY_NAME_MAPPINGS = {"LLMStream": "Streaming LLM Text Generator"}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement