Advertisement
subbass

txtreader search/browse/read

May 22nd, 2024 (edited)
477
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 22.43 KB | Source Code | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. import os
  4. import re
  5. import curses
  6. import subprocess
  7. import json
  8. import argparse
  9. import signal
  10. import shutil
  11. from datetime import datetime, timedelta
  12.  
  13. def get_config_file():
  14.     """Get the path to the configuration file."""
  15.     script_name = os.path.splitext(os.path.basename(__file__))[0]
  16.     config_file = f"{script_name}_conf.txt"
  17.     return os.path.join(os.path.dirname(__file__), config_file)
  18.  
  19. def load_config():
  20.     """Load the entire configuration from a plain text file."""
  21.     config_file = get_config_file()
  22.     default_config = {
  23.         "SEARCH_DIR": os.path.expanduser("~/Documents/text_files/"),
  24.         "TEMP_COPY_DIR": os.path.expanduser("~/Documents/text_files/temp_copy"),
  25.         "favorites": [],
  26.         "editor": "nano"
  27.     }
  28.     config = default_config.copy()
  29.  
  30.     if os.path.exists(config_file):
  31.         with open(config_file, 'r') as f:
  32.             lines = f.readlines()
  33.             current_favorite = {}
  34.             for line in lines:
  35.                 line = line.strip()
  36.                 if line.startswith("favorite_") and "=" in line:
  37.                     key, value = line.split('=', 1)
  38.                     if key.endswith("_filename"):
  39.                         if current_favorite:
  40.                             config["favorites"].append(current_favorite)
  41.                         current_favorite = {"filename": value}
  42.                     elif key.endswith("_position"):
  43.                         current_favorite["position"] = int(value)
  44.                     elif key.endswith("_search_text"):
  45.                         current_favorite["search_text"] = value
  46.                 else:
  47.                     key, value = line.split('=', 1)
  48.                     config[key] = value
  49.             if current_favorite:
  50.                 config["favorites"].append(current_favorite)
  51.     return config
  52.  
  53. def save_default_config():
  54.     """Save the default configuration to a plain text file if it doesn't exist."""
  55.     config_file = get_config_file()
  56.     if not os.path.exists(config_file):
  57.         default_config = {
  58.             "SEARCH_DIR": os.path.expanduser("~/Documents/text_files/"),
  59.             "TEMP_COPY_DIR": os.path.expanduser("~/Documents/text_files/temp_copy"),
  60.             "editor": "nano"
  61.         }
  62.         save_config(default_config)
  63.  
  64. def save_config(config):
  65.     """Save the entire configuration to a plain text file."""
  66.     config_file = get_config_file()
  67.     with open(config_file, 'w') as f:
  68.         f.write(f"SEARCH_DIR={config['SEARCH_DIR']}\n")
  69.         f.write(f"TEMP_COPY_DIR={config['TEMP_COPY_DIR']}\n")
  70.         f.write(f"editor={config.get('editor', 'nano')}\n")
  71.         if 'last_file' in config:
  72.             f.write(f"last_file={config['last_file']}\n")
  73.         if 'position' in config:
  74.             f.write(f"position={config['position']}\n")
  75.         if 'search_text' in config:
  76.             f.write(f"search_text={config['search_text']}\n")
  77.         for i, fav in enumerate(config.get("favorites", []), 1):
  78.             f.write(f"favorite_{i}_filename={fav['filename']}\n")
  79.             f.write(f"favorite_{i}_position={fav['position']}\n")
  80.             f.write(f"favorite_{i}_search_text={fav['search_text']}\n")
  81.  
  82. def save_position(filename, position, search_text=""):
  83.     """Save the last position of the file being read."""
  84.     data = load_config()
  85.     data["last_file"] = filename
  86.     data["position"] = position
  87.     data["search_text"] = search_text
  88.     save_config(data)
  89.  
  90. def load_position():
  91.     """Load the last position of the file read."""
  92.     data = load_config()
  93.     return {
  94.         "last_file": data.get("last_file"),
  95.         "position": int(data.get("position", 0)),
  96.         "search_text": data.get("search_text", "")
  97.     }
  98.  
  99. def add_to_favorites(filename, position, search_text):
  100.     """Add the current file to the favorites list."""
  101.     config = load_config()
  102.     favorites = config.get("favorites", [])
  103.     favorites.append({
  104.         "filename": filename,
  105.         "position": position,
  106.         "search_text": search_text
  107.     })
  108.     config["favorites"] = favorites
  109.     save_config(config)
  110.  
  111. def list_files(directory):
  112.     """List all text files in the given directory and its subdirectories."""
  113.     file_list = []
  114.     for root, _, files in os.walk(directory):
  115.         for file in files:
  116.             if file.endswith(".txt"):
  117.                 file_list.append(os.path.join(root, file))
  118.     return file_list
  119.  
  120. def read_file(file_path):
  121.     """Read a file with different encodings to avoid UnicodeDecodeError."""
  122.     encodings = ['utf-8', 'latin-1', 'iso-8859-1']
  123.     for encoding in encodings:
  124.         try:
  125.             with open(file_path, 'r', encoding=encoding) as file:
  126.                 return file.readlines()
  127.         except UnicodeDecodeError:
  128.             continue
  129.     return []
  130.  
  131. def save_search_results(search_term, results):
  132.     """Save search results to a cache file."""
  133.     config = load_config()
  134.     cache_dir = os.path.join(config["TEMP_COPY_DIR"], "cache")
  135.     if not os.path.exists(cache_dir):
  136.         os.makedirs(cache_dir)
  137.     cache_file = os.path.join(cache_dir, f"{search_term}.json")
  138.     data = {
  139.         "timestamp": datetime.now().isoformat(),
  140.         "results": results
  141.     }
  142.     with open(cache_file, 'w') as f:
  143.         json.dump(data, f)
  144.  
  145. def load_search_results(search_term):
  146.     """Load search results from a cache file if they are recent."""
  147.     config = load_config()
  148.     cache_dir = os.path.join(config["TEMP_COPY_DIR"], "cache")
  149.     cache_file = os.path.join(cache_dir, f"{search_term}.json")
  150.     if os.path.exists(cache_file):
  151.         with open(cache_file, 'r') as f:
  152.             data = json.load(f)
  153.             timestamp = datetime.fromisoformat(data["timestamp"])
  154.             if datetime.now() - timestamp < timedelta(days=1):
  155.                 return data["results"]
  156.     return None
  157.  
  158. def search_files(file_list, search_pattern):
  159.     """Search for a term within a list of files and count matches, using cached results if available."""
  160.     cached_results = load_search_results(search_pattern)
  161.     if cached_results is not None:
  162.         return cached_results
  163.    
  164.     results = []
  165.     pattern = re.compile(search_pattern, re.IGNORECASE)
  166.     for file_path in file_list:
  167.         lines = read_file(file_path)
  168.         match_count = sum(1 for line in lines if pattern.search(line))
  169.         if match_count > 0:
  170.             results.append((file_path, match_count))
  171.    
  172.     save_search_results(search_pattern, results)
  173.     return results
  174.  
  175. def copy_file_to_temp_copy(file_path):
  176.     """Copy the given file to the temp_copy directory."""
  177.     config = load_config()
  178.     shutil.copy(file_path, config["TEMP_COPY_DIR"])
  179.  
  180. def open_in_editor(file_path, line_number):
  181.     """Open the file in the configured editor at the specified line number."""
  182.     config = load_config()
  183.     editor = config.get("editor", "nano")
  184.  
  185.     # End the curses session before opening the editor
  186.     curses.endwin()
  187.  
  188.     if editor == "nano":
  189.         subprocess.call([editor, f"+{line_number}", file_path])
  190.     elif editor == "xdg-open":
  191.         subprocess.call([editor, file_path])
  192.     else:
  193.         raise ValueError("Unsupported editor configured.")
  194.  
  195.     # Reinitialize the curses session after closing the editor
  196.     stdscr = curses.initscr()
  197.     curses.start_color()
  198.     curses.use_default_colors()
  199.     curses.cbreak()
  200.     stdscr.keypad(True)
  201.     curses.noecho()
  202.     return stdscr
  203.  
  204. def display_file(stdscr, file_path, start_pos=0, search_text=""):
  205.     """Display the file using curses and pandoc."""
  206.     def find_matches(lines, pattern):
  207.         """Find all matches of the pattern in the lines."""
  208.         matches = []
  209.         regex = re.compile(pattern, re.IGNORECASE)
  210.         for i, line in enumerate(lines):
  211.             if regex.search(line):
  212.                 matches.append(i)
  213.         return matches
  214.  
  215.     def display_status(stdscr, file_path, file_size_kb, percentage_read, search_mode, search_text="", match_idx=0, total_matches=0):
  216.         """Display the status bar."""
  217.         max_y, max_x = stdscr.getmaxyx()
  218.         status = f"File: {os.path.basename(file_path)} | Size: {file_size_kb:.2f} KB | {percentage_read:.2f}% read"
  219.         stdscr.addstr(0, 0, status[:max_x-1], curses.A_REVERSE)
  220.         if search_mode:
  221.             search_status = f"Search: {search_text} ({match_idx}/{total_matches})"
  222.             stdscr.addstr(max_y-1, 0, search_status[:max_x-1], curses.A_REVERSE)
  223.         else:
  224.             stdscr.addstr(max_y-1, 0, " " * (max_x-1), curses.A_REVERSE)
  225.  
  226.     def handle_resize(signum, frame):
  227.         """Handle window resize signal."""
  228.         curses.resizeterm(*stdscr.getmaxyx())
  229.  
  230.     signal.signal(signal.SIGWINCH, handle_resize)
  231.  
  232.     # Check for bookmark, favorite, or last read position
  233.     config = load_config()
  234.     last_position = load_position()
  235.  
  236.     # Check if the file is the last read file
  237.     if last_position['last_file'] == file_path:
  238.         start_pos = last_position['position']
  239.         search_text = last_position.get('search_text', '')
  240.  
  241.     # Check if the file is in the favorites
  242.     bookmarks = [fav for fav in config.get('favorites', []) if fav['filename'] == file_path]
  243.     if bookmarks:
  244.         start_pos = bookmarks[0]['position']
  245.         search_text = bookmarks[0].get('search_text', '')
  246.  
  247.     while True:
  248.         try:
  249.             with open(file_path, 'rb') as f:
  250.                 content = f.read().decode('utf-8', errors='ignore')
  251.         except Exception as e:
  252.             return
  253.  
  254.         # Use pandoc to format the text
  255.         process = subprocess.Popen(['pandoc', '-f', 'markdown', '-t', 'plain'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  256.         formatted_text, _ = process.communicate(input=content.encode())
  257.  
  258.         lines = formatted_text.decode().split('\n')
  259.         file_size_kb = os.path.getsize(file_path) / 1024
  260.         max_y, max_x = stdscr.getmaxyx()
  261.         pos = start_pos
  262.  
  263.         search_mode = bool(search_text)
  264.         matches = find_matches(lines, search_text) if search_mode else []
  265.         current_match = 0
  266.  
  267.         while True:
  268.             stdscr.clear()
  269.             # Status bar
  270.             percentage_read = (pos / len(lines)) * 100 if lines else 0
  271.             display_status(stdscr, file_path, file_size_kb, percentage_read, search_mode, search_text, current_match + 1, len(matches))
  272.  
  273.             for i, line in enumerate(lines[pos:pos+max_y-2]):
  274.                 line_display = line[:max_x-1]
  275.                 if search_mode and search_text.lower() in line.lower():
  276.                     start_idx = line.lower().find(search_text.lower())
  277.                     stdscr.addstr(i+1, 0, line_display[:start_idx])
  278.                     stdscr.addstr(i+1, start_idx, line_display[start_idx:start_idx+len(search_text)], curses.A_REVERSE)
  279.                     stdscr.addstr(i+1, start_idx+len(search_text), line_display[start_idx+len(search_text):])
  280.                 else:
  281.                     stdscr.addstr(i+1, 0, line_display)
  282.             stdscr.refresh()
  283.  
  284.             key = stdscr.getch()
  285.             if key == curses.KEY_DOWN and pos < len(lines) - max_y + 2:
  286.                 pos += 1
  287.                 if search_mode:
  288.                     # Update current match index based on position
  289.                     while current_match < len(matches) and matches[current_match] < pos:
  290.                         current_match += 1
  291.             elif key == curses.KEY_UP and pos > 0:
  292.                 pos -= 1
  293.                 if search_mode:
  294.                     # Update current match index based on position
  295.                     while current_match > 0 and matches[current_match - 1] >= pos:
  296.                         current_match -= 1
  297.             elif key == curses.KEY_NPAGE:  # Page Down
  298.                 pos = min(pos + max_y - 2, len(lines) - max_y + 2)
  299.                 if search_mode:
  300.                     # Update current match index based on position
  301.                     while current_match < len(matches) and matches[current_match] < pos:
  302.                         current_match += 1
  303.             elif key == curses.KEY_PPAGE:  # Page Up
  304.                 pos = max(pos - (max_y - 2), 0)
  305.                 if search_mode:
  306.                     # Update current match index based on position
  307.                     while current_match > 0 and matches[current_match - 1] >= pos:
  308.                         current_match -= 1
  309.             elif key == ord('s'):
  310.                 search_mode = True
  311.                 search_text = ""
  312.                 stdscr.addstr(max_y-1, 0, "Search: ", curses.A_REVERSE)
  313.                 curses.echo()
  314.                 search_text = stdscr.getstr(max_y-1, 8).decode('utf-8')
  315.                 curses.noecho()
  316.                 matches = find_matches(lines, search_text)
  317.                 current_match = 0
  318.                 if matches:
  319.                     pos = max(matches[current_match] - 2, 0)
  320.             elif key == ord('x'):
  321.                 search_mode = False
  322.                 search_text = ""
  323.             elif search_mode and key == curses.KEY_RIGHT and matches:
  324.                 if current_match < len(matches) - 1:
  325.                     current_match += 1
  326.                     pos = max(matches[current_match] - 2, 0)
  327.             elif search_mode and key == curses.KEY_LEFT and matches:
  328.                 if current_match > 0:
  329.                     current_match -= 1
  330.                     pos = max(matches[current_match] - 2, 0)
  331.             elif key in map(ord, '0123456789'):
  332.                 percent = int(chr(key)) * 10
  333.                 pos = min(int((percent / 100) * len(lines)), len(lines) - max_y + 2)
  334.                 if search_mode:
  335.                     # Update current match index based on position
  336.                     while current_match < len(matches) and matches[current_match] < pos:
  337.                         current_match += 1
  338.             elif key == ord('c'):
  339.                 copy_file_to_temp_copy(file_path)
  340.             elif key == ord('f'):
  341.                 add_to_favorites(file_path, pos, search_text)
  342.             elif key == ord('e'):
  343.                 stdscr = open_in_editor(file_path, pos + 1)
  344.                 break  # Exit to reload file after editing
  345.             elif key == ord('q'):
  346.                 save_position(file_path, pos, search_text)
  347.                 return
  348.  
  349.         # Reload the file content after editing
  350.         curses.curs_set(1)
  351.         stdscr.clear()
  352.         stdscr.refresh()
  353.         curses.curs_set(0)
  354.  
  355.  
  356.  
  357.  
  358. def display_favorites(stdscr):
  359.     """Display the list of favorite files."""
  360.     config = load_config()
  361.     favorites = config.get("favorites", [])
  362.    
  363.     if not favorites:
  364.         stdscr.addstr(0, 0, "No favorites saved.", curses.color_pair(1))
  365.         stdscr.refresh()
  366.         stdscr.getch()
  367.         return
  368.    
  369.     current_row = 0
  370.     top_row = 0
  371.     max_file_size_len = 10  # Set a constant width for the file size column
  372.    
  373.     while True:
  374.         stdscr.clear()
  375.         max_y, max_x = stdscr.getmaxyx()
  376.         stdscr.addstr(0, 0, "Favorites")
  377.         for idx, fav in enumerate(favorites[top_row:top_row + max_y - 2]):
  378.             row_idx = idx + 1
  379.             if row_idx >= max_y:
  380.                 break
  381.             if idx + top_row == current_row:
  382.                 stdscr.attron(curses.A_REVERSE)
  383.             file_size_kb = os.path.getsize(fav['filename']) / 1024
  384.             file_size_display = f"{file_size_kb:.2f} KB".ljust(max_file_size_len)
  385.             display_text = f"{file_size_display} | {os.path.basename(fav['filename'])} (Position: {fav['position']}, Search: {fav['search_text']})"
  386.             if len(display_text) > max_x - 1:
  387.                 display_text = display_text[:max_x - 4] + '...'
  388.             stdscr.addstr(row_idx, 0, display_text)
  389.             if idx + top_row == current_row:
  390.                 stdscr.attroff(curses.A_REVERSE)
  391.         stdscr.refresh()
  392.        
  393.         key = stdscr.getch()
  394.        
  395.         if key == curses.KEY_DOWN and current_row < len(favorites) - 1:
  396.             current_row += 1
  397.             if current_row >= top_row + max_y - 2:
  398.                 top_row += 1
  399.         elif key == curses.KEY_UP and current_row > 0:
  400.             current_row -= 1
  401.             if current_row < top_row:
  402.                 top_row -= 1
  403.         elif key == curses.KEY_RIGHT or key == ord('\n'):
  404.             if favorites:
  405.                 fav = favorites[current_row]
  406.                 display_file(stdscr, fav['filename'], fav['position'], fav['search_text'])
  407.         elif key == ord('q'):
  408.             break
  409.         elif key == curses.KEY_BACKSPACE:
  410.             if favorites:
  411.                 del favorites[current_row]
  412.                 config["favorites"] = favorites
  413.                 save_config(config)
  414.                 if current_row >= len(favorites):
  415.                     current_row = len(favorites) - 1
  416.                 if top_row > current_row:
  417.                     top_row = current_row
  418.  
  419.  
  420. def search_mode(stdscr, search_text=""):
  421.     """Search mode to handle user input and search results."""
  422.     curses.curs_set(0)
  423.     current_row = 0
  424.     top_row = 0
  425.     results = []
  426.     error_message = ""
  427.     sort_mode = 0  # 0: Name, 1: Size Ascending, 2: Size Descending, 3: Matches Ascending, 4: Matches Descending
  428.  
  429.     config = load_config()
  430.     files = list_files(config["SEARCH_DIR"])
  431.     results = list(search_files(files, search_text))
  432.     if not results:
  433.         error_message = "No matches found."
  434.     else:
  435.         error_message = ""
  436.  
  437.     base_path = config["SEARCH_DIR"]
  438.     max_file_size_len = 10  # Set a constant width for the file size column
  439.  
  440.     while True:
  441.         if sort_mode == 1:
  442.             results.sort(key=lambda x: os.path.getsize(x[0]))
  443.         elif sort_mode == 2:
  444.             results.sort(key=lambda x: os.path.getsize(x[0]), reverse=True)
  445.         elif sort_mode == 3:
  446.             results.sort(key=lambda x: (x[1], os.path.getsize(x[0])), reverse=False)
  447.         elif sort_mode == 4:
  448.             results.sort(key=lambda x: (x[1], os.path.getsize(x[0])), reverse=True)
  449.         else:
  450.             results.sort(key=lambda x: x[0])
  451.  
  452.         stdscr.clear()
  453.         sort_text = ["Name", "Size Ascending", "Size Descending", "Matches Ascending", "Matches Descending"][sort_mode]
  454.         status_text = f"Search: {search_text} | Sort by: {sort_text} | Result {current_row + 1}/{len(results)}"
  455.         stdscr.addstr(0, 0, status_text[:stdscr.getmaxyx()[1]-1])
  456.         if error_message:
  457.             stdscr.addstr(1, 0, f"Error: {error_message}", curses.color_pair(1))
  458.         if results:
  459.             max_y, max_x = stdscr.getmaxyx()
  460.             max_display_rows = max_y - 3
  461.             for idx, (result, count) in enumerate(results[top_row:top_row + max_display_rows]):
  462.                 row_idx = idx + 2
  463.                 if row_idx >= max_y:
  464.                     break
  465.                 if idx + top_row == current_row:
  466.                     stdscr.attron(curses.A_REVERSE)
  467.                 display_text = result.replace(base_path, "", 1)  # Remove the base path only once
  468.                 file_size_kb = os.path.getsize(result) / 1024
  469.                 count_display = f"{count:03}".replace(" ", ".")
  470.                 file_size_display = f"{file_size_kb:.2f} KB".ljust(max_file_size_len)
  471.                 display_text = f"{count_display} | {file_size_display} | {display_text}"
  472.                 if len(display_text) > max_x - 1:
  473.                     display_text = display_text[:max_x - 4] + '...'
  474.                 stdscr.addstr(row_idx, 0, display_text)
  475.                 if idx + top_row == current_row:
  476.                     stdscr.attroff(curses.A_REVERSE)
  477.         stdscr.refresh()
  478.  
  479.         key = stdscr.getch()
  480.  
  481.         if key == curses.KEY_DOWN and results and current_row < len(results) - 1:
  482.             current_row += 1
  483.             if current_row >= top_row + max_display_rows - 4:
  484.                 top_row = min(top_row + 1, len(results) - max_display_rows)
  485.         elif key == curses.KEY_UP and current_row > 0:
  486.             current_row -= 1
  487.             if current_row < top_row + 4:
  488.                 top_row = max(top_row - 1, 0)
  489.         elif key == curses.KEY_RIGHT or key == ord('\n'):
  490.             if results:
  491.                 display_file(stdscr, results[current_row][0], search_text=search_text)
  492.         elif key == ord('c'):
  493.             copy_file_to_temp_copy(results[current_row][0])
  494.         elif key == ord('t'):
  495.             sort_mode = (sort_mode + 1) % 5  # Update to cycle through 5 sorting modes
  496.         elif key == ord('f'):
  497.             display_favorites(stdscr)
  498.         elif key == ord('e'):
  499.             if results:
  500.                 stdscr = open_in_editor(results[current_row][0], 1)  # Open at the first line
  501.                 # Reload the search results after editing
  502.                 results = list(search_files(files, search_text))
  503.                 if sort_mode == 1:
  504.                     results.sort(key=lambda x: os.path.getsize(x[0]))
  505.                 elif sort_mode == 2:
  506.                     results.sort(key=lambda x: os.path.getsize(x[0]), reverse=True)
  507.                 elif sort_mode == 3:
  508.                     results.sort(key=lambda x: (x[1], os.path.getsize(x[0])), reverse=False)
  509.                 elif sort_mode == 4:
  510.                     results.sort(key=lambda x: (x[1], os.path.getsize(x[0])), reverse=True)
  511.                 else:
  512.                     results.sort(key=lambda x: x[0])
  513.         elif key == ord('q'):
  514.             break
  515.  
  516.  
  517. def resume_mode(stdscr):
  518.     """Resume mode to open the last viewed file at the last position."""
  519.     config = load_position()
  520.     last_file = config['last_file']
  521.     last_pos = config['position']
  522.     if last_file:
  523.         display_file(stdscr, last_file, last_pos)
  524.  
  525. if __name__ == '__main__':
  526.     save_default_config()  # Ensure default config is saved if it doesn't exist
  527.    
  528.     parser = argparse.ArgumentParser(description="Search and view text files.")
  529.     parser.add_argument('-l', '--last', action='store_true', help="Open the last viewed file at the last position.")
  530.     parser.add_argument('-f', '--favorites', action='store_true', help="Display the list of favorite files.")
  531.     parser.add_argument('search_text', nargs='?', type=str, help="The search term.")
  532.     args = parser.parse_args()
  533.  
  534.     if args.last:
  535.         curses.wrapper(resume_mode)
  536.     elif args.favorites:
  537.         curses.wrapper(display_favorites)
  538.     elif args.search_text:
  539.         curses.wrapper(search_mode, args.search_text)
  540.     else:
  541.         last_position = load_position()
  542.         if last_position["last_file"]:
  543.             curses.wrapper(display_file, last_position["last_file"], last_position["position"], last_position["search_text"])
  544.         else:
  545.             print("Usage: smutreader [-l] [-f] [search_text]")
  546.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement