Advertisement
Guest User

CIS2Excel

a guest
Oct 16th, 2024
139
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 11.06 KB | Source Code | 0 0
  1. import re
  2. import os
  3. import sys
  4. import json
  5. import pandas as pd
  6. import tika
  7. from tika import parser
  8. from openpyxl import load_workbook
  9. from openpyxl.utils.dataframe import dataframe_to_rows
  10. from openpyxl.styles import Alignment, PatternFill
  11. from openpyxl.worksheet.table import Table, TableStyleInfo
  12. from openpyxl.formatting.rule import FormulaRule
  13.  
  14. # Initialize Tika VM
  15. tika.initVM()
  16.  
  17. def process_pdf(cispdf, outfile):
  18.     # JSON and Excel file names
  19.     cisjson = f"{outfile}.json"
  20.     cisexcel = f"{outfile}.xlsx"
  21.     cistext = 'cis_text.txt'
  22.  
  23.     # Convert PDF to text
  24.     print(f"[+] Converting '{cispdf}' to text...")
  25.     raw = parser.from_file(cispdf)
  26.     data = raw['content']
  27.  
  28.     # Create a temporary text file
  29.     print("[+] Creating temp text file...")
  30.     with open(cistext, 'w', encoding='utf-8') as f:
  31.         f.write(data)
  32.  
  33.     # Remove blank lines and write to temp file
  34.     with open(cistext, 'r', encoding='utf-8') as filer, open('temp.txt', 'w', encoding='utf-8') as filew:
  35.         for line in filer:
  36.             if not line.strip():
  37.                 continue
  38.             filew.write(line)
  39.  
  40.     # Initialize flags and variables
  41.     flagStart, flagDesc, flagAudit, flagRecom, flagComplete = False, False, False, False, False
  42.     cis_title, cis_desc, cis_audit, cis_recom = "", "", "", ""
  43.     listObj = []
  44.     start_processing = False  # Flag to indicate when to start processing lines
  45.  
  46.     # Convert text to JSON format
  47.     print("[+] Converting to JSON...")
  48.     with open("temp.txt", 'r', encoding='utf-8') as filer:
  49.         for line in filer:
  50.             # Check for the start marker "Appendix: Change History"
  51.             if "Appendix: Change History" in line:
  52.                 start_processing = True
  53.                 continue  # Skip the line containing the start marker
  54.  
  55.             if "CIS Benchmark Recommendation Set" in line:
  56.                 break
  57.  
  58.             # Skip processing until the start marker is found
  59.             if not start_processing:
  60.                 continue
  61.  
  62.             if not line.strip():
  63.                 continue
  64.             line = line.strip()
  65.  
  66.             x = {}  # JSON object
  67.  
  68.             # Check if line matches the CIS title format
  69.             if re.match(r"^[0-9]+\.[0-9]+(\.[0-9]+)* \((L[1-9]|BL)\) .*(?<!\d)$", line):
  70.                 # Split the line into parts based on spaces
  71.                 parts = line.split(' ', 2)  # Split into three parts: section, level, and the rest
  72.  
  73.                 # Extract section, level, and title
  74.                 section = parts[0]  # The first part is the section
  75.                 level = parts[1] if parts[1].startswith('(') else ""  # The second part is the level if it starts with '('
  76.                 title = parts[2] if len(parts) > 2 else ""  # The rest is the title
  77.  
  78.                 # Read and concatenate lines until "Profile Applicability:" is found
  79.                 while "Profile Applicability:" not in line:
  80.                     next_line = next(filer, '').strip()  # Read the next line
  81.                     if "Profile Applicability:" in next_line:
  82.                         break
  83.  
  84.                     title += " " + next_line
  85.  
  86.                 flagStart, flagDesc, flagAudit, flagRecom, flagComplete = True, False, False, False, False
  87.  
  88.             if flagStart:
  89.                 # Get description - capture everything between 'Description:' and 'Rationale:'
  90.                 if "Description:" in line:
  91.                     flagDesc = True
  92.                     continue  # Skip the "Description:" line itself
  93.  
  94.                 if flagDesc and "Rationale:" not in line:
  95.                     cis_desc += line
  96.  
  97.                 if "Rationale:" in line:
  98.                     flagDesc = False
  99.  
  100.                 # Get Audit - capture everything between 'Audit:' and 'Remediation:'
  101.                 if "Audit:" in line:
  102.                     flagAudit = True
  103.                     continue  # Skip the "Audit:" line itself
  104.  
  105.                 if flagAudit and "Remediation:" not in line:
  106.                     cis_audit += line
  107.  
  108.                 # Check for Remediation and start capturing
  109.                 if "Remediation:" in line:
  110.                     flagAudit = False
  111.                     flagRecom = True
  112.  
  113.                     # Start formatting Remediation section
  114.                     if "Remediation:" not in cis_recom:
  115.                         cis_recom += "\n\nRemediation:\n" + line.replace('Remediation:', '').strip()
  116.                     continue  # Continue processing the next lines for remediation
  117.  
  118.                 # Get Remediation - capture everything between 'Remediation:' and 'References:'
  119.                 if flagRecom:
  120.                     # Stop capturing if any of these markers are found
  121.                     if "References:" in line or "Additional Information:" in line or "CIS Controls:" in line:
  122.                         flagRecom = False
  123.                         flagComplete = True
  124.                     else:
  125.                         # Insert specific formatting and handle different sections
  126.                         if "Remediation:" in line:
  127.                             # Ensure Remediation section is correctly formatted
  128.                             if "Remediation:" not in cis_recom:
  129.                                 cis_recom += "\n\nRemediation:\n" + line.replace('Remediation:', '').strip()
  130.                             else:
  131.                                 cis_recom += " " + line.strip()  # Continue appending to Remediation section
  132.                         elif "Default Value:" in line:
  133.                             cis_recom += "\n\nDefault Value:\n" + line.replace('Default Value:', '').strip()
  134.                         elif "Additional Information:" in line:
  135.                             cis_recom += "\n\nAdditional Information:\n" + line.replace('Additional Information:', '').strip()
  136.                         else:
  137.                             # Add content normally, with a space to separate from previous content
  138.                             cis_recom += " " + line.strip()
  139.  
  140.  
  141.                 if flagComplete:
  142.                     # Clean up the extracted text
  143.                     title = title.replace('\n', ' ')
  144.                     cis_desc = cis_desc.replace('\n', ' ').replace('Rationale:', '').replace('| P a g e', '')
  145.                     cis_audit = cis_audit.replace('\n', ' ').replace('Remediation:', '').replace('| P a g e', '')
  146.                     # Clean up the extracted text before saving
  147.                     cis_recom = re.sub(r'\bPage \d+\b', '', cis_recom).strip()
  148.  
  149.                     x['section'] = section
  150.                     x['level'] = level
  151.                     x['title'] = title
  152.                     x['description'] = cis_desc
  153.                     x['audit'] = cis_audit
  154.                     x['recommendations'] = cis_recom
  155.  
  156.                     # Reset for the next section
  157.                     cis_title, cis_desc, cis_audit, cis_recom = "", "", "", ""
  158.                     flagStart, flagDesc, flagAudit, flagRecom, flagComplete = False, False, False, False, False
  159.                     listObj.append(x)
  160.  
  161.     print(f"[+] Writing to '{cisjson}' ...")
  162.     with open(cisjson, 'w') as json_file:
  163.         json.dump(listObj, json_file, indent=4, separators=(',', ': '))
  164.  
  165.     # Create Excel file from JSON data
  166.     print(f"[+] Creating '{cisexcel}' ...")
  167.     df_json = pd.read_json(cisjson)
  168.  
  169.     # Add the additional columns: Intune Policy and Exception
  170.     df_json.insert(3, 'Intune Policy', '')
  171.     df_json.insert(4, 'Exception', '')    
  172.  
  173.     # Add the 'Exception Reason' column to the end
  174.     df_json['Exception Reason'] = ''
  175.  
  176.     # Save initial Excel file
  177.     df_json.to_excel(cisexcel, index=False)
  178.  
  179.     # Load workbook and select the active sheet
  180.     wb = load_workbook(cisexcel)
  181.     ws = wb.active
  182.  
  183.     # Set column widths and apply word wrap
  184.     column_widths = {
  185.         'C': 45,  # Title column
  186.         'D': 30,  # Intune Policy column width (adjust as needed)
  187.         'E': 15,  # Exception column width (adjust as needed)
  188.         'F': 74,  # Description column
  189.         'G': 74,  # Audit column
  190.         'H': 74,  # Recommendations column
  191.         'I': 70,  # Exception Reason column width (adjust as needed)
  192.     }
  193.  
  194.     # Apply formatting for each column
  195.     for col in ws.columns:
  196.         column = col[0].column_letter  # Get the column letter
  197.  
  198.         for cell in col:
  199.             # Apply word wrap for relevant columns
  200.             if column in ['A', 'B', 'C', 'D', 'E']:  # Columns A to E to vertically middle align
  201.                 cell.alignment = Alignment(wrap_text=True, vertical='center')
  202.  
  203.             if column in ['F', 'G', 'H']:  # Columns to word wrap
  204.                 cell.alignment = Alignment(wrap_text=True)
  205.  
  206.             # Set the column width
  207.             if column in column_widths:
  208.                 ws.column_dimensions[column].width = column_widths[column]
  209.  
  210.     # Define table range and create a Table object
  211.     table_range = f"A1:{ws.cell(row=ws.max_row, column=ws.max_column).coordinate}"
  212.     table = Table(displayName="IntunePolicyTable", ref=table_range)
  213.  
  214.     # Apply table style
  215.     style = TableStyleInfo(
  216.         name="TableStyleLight9",  # Blue, Table Style Medium 9
  217.         showFirstColumn=False,
  218.         showLastColumn=False,
  219.         showRowStripes=True,
  220.         showColumnStripes=True
  221.     )
  222.     table.tableStyleInfo = style
  223.  
  224.     # Add the table to the worksheet
  225.     ws.add_table(table)
  226.  
  227.     # Define colors for conditional formatting
  228.     green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")  # Light green
  229.     red_fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")   # Light red
  230.  
  231.     # Apply conditional formatting: If there is a value in "Intune Policy" (Column D), fill A-C with green
  232.     ws.conditional_formatting.add(
  233.         'A2:C{}'.format(ws.max_row),
  234.         FormulaRule(
  235.             formula=['NOT(ISBLANK($D2))'],  # Reference to the D column to check for values
  236.             stopIfTrue=True,
  237.             fill=green_fill
  238.         )
  239.     )
  240.  
  241.     # Apply conditional formatting: If there is a value in "Exception" (Column E), fill A-C with red
  242.     ws.conditional_formatting.add(
  243.         'A2:C{}'.format(ws.max_row),
  244.         FormulaRule(
  245.             formula=['NOT(ISBLANK($E2))'],  # Reference to the E column to check for values
  246.             stopIfTrue=True,
  247.             fill=red_fill
  248.         )
  249.     )
  250.  
  251.     # Freeze the first three columns
  252.     ws.freeze_panes = 'D1'
  253.  
  254.     # Save the updated Excel file
  255.     wb.save(cisexcel)
  256.     print("[+] Done!")
  257.  
  258. def main():
  259.     if len(sys.argv) < 2:
  260.         print("[!] Please provide the input folder name!")
  261.         print("Usage: python {} <input_folder>\n".format(sys.argv[0]))
  262.         exit()
  263.  
  264.     input_folder = sys.argv[1]
  265.  
  266.     # Iterate over all files in the folder
  267.     for filename in os.listdir(input_folder):
  268.         if filename.startswith("CIS_") and filename.endswith(".pdf"):
  269.             cispdf = os.path.join(input_folder, filename)
  270.             outfile = os.path.join(input_folder, os.path.splitext(filename)[0])
  271.             process_pdf(cispdf, outfile)
  272.  
  273. if __name__ == "__main__":
  274.     main()
  275.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement