docs/remove-list-columns.py

import os
import re
import sys

def parse_meta_directives(lines):
   directives = {}
   for line in lines:
       if ":remove-column-from-html-table:" in line:
           directives["remove_column"] = line.split(":", 2)[2].strip()
       if ":remove-column-emptied-row:" in line:
           directives["remove_emptied_row"] = line.split(":", 2)[2].strip() == "1"
       if ":docs-build-context:" in line:
           directives["context"] = line.split(":", 2)[2].strip()

   return directives

def extract_table_blocks(lines):
   blocks = []
   current = []
   inside = False
   for line in lines:
       if line.strip().startswith(".. list-table::"):
           inside = True
           current = [line]
       elif inside and line.startswith("  ") or line.strip() == "":
           current.append(line)
       elif inside:
           blocks.append(current)
           inside = False
   if inside:
       blocks.append(current)
   return blocks

def split_table_row(row_lines):
   """Splits a table row (beginning with '*') into a list of cells."""
   cells = []
   current_cell = []
   for line in row_lines:
       if re.match(r'^\s*\*\s+-', line):  # First cell in row
           parts = re.split(r'\s*\*\s+-\s*', line, maxsplit=1)
           current_cell = [parts[1]]
       elif re.match(r'^\s*-\s+', line):  # New cell
           cells.append(current_cell)
           current_cell = [line.strip()[2:]]
       else:
           current_cell.append(line.strip())
   cells.append(current_cell)
   return cells

def join_cells(cells, base_indent):
   """Reconstructs a list-table row from cell lists."""
   line = f"{base_indent}* - " + cells[0][0]
   lines = [line]
   for line in cells[0][1:]:
       lines.append(base_indent + "    " + line)
   for cell in cells[1:]:
       lines.append(base_indent + "  - " + cell[0])
       for l in cell[1:]:
           lines.append(base_indent + "    " + l)
   return lines

def process_table(table_lines, cols_to_remove_str, remove_empty_row=False):
   # Parse comma-separated column names
   cols_to_remove = [col.strip() for col in cols_to_remove_str.split(',')]

   processed = []
   table_rows = []
   header_indices = []
   header_row = []
   buffer = []

   for line in table_lines:
       if re.match(r'\s*\*\s+-', line):
           match = re.match(r'(\s*)\*\s+-', line)
           indentation = match.group(1)
           if buffer:
               table_rows.append(buffer)
           buffer = [line]
       elif buffer != [] and (line.strip() == "" or re.match(r'^\s*(-|[^*].*)$', line)):
           buffer.append(line)
       else:
           if buffer:
               table_rows.append(buffer)
               buffer = []
           processed.append(line)

   if buffer:
       table_rows.append(buffer)

   # Parse header row and find all matching column indices
   for i, row in enumerate(table_rows):
       if i == 0:
           cells = split_table_row(row)
           flat_cells = [' '.join(c).strip() for c in cells]

           # Find indices of all columns to remove
           for col_name in cols_to_remove:
               if col_name in flat_cells:
                   header_indices.append(flat_cells.index(col_name))

           # If no columns found, don't modify
           if not header_indices:
               return table_lines

           # Sort indices in descending order so we can remove from right to left
           header_indices.sort(reverse=True)
           header_row = cells
       break

   if not header_indices:
       return table_lines  # Don't modify

   # Remove the columns from each row
   new_rows = []
   for row in table_rows:
       cells = split_table_row(row)

       # Remove columns from right to left to preserve indices
       for header_index in header_indices:
           if header_index < len(cells):
               del cells[header_index]

       # Check if row should be removed (if it's empty after column removal)
       if remove_empty_row and all(not ''.join(cell).strip() for cell in cells):
           continue

       new_rows.append(join_cells(cells, indentation))

   return processed + [""] + [line for row in new_rows for line in row]

def process_file(path):
   with open(path, 'r', encoding='utf-8') as f:
       lines = f.readlines()

   directives = parse_meta_directives(lines)
   if "remove_column" not in directives:
       return
   if directives["context"] != os.environ.get('DOCS_BUILD_CONTEXT'):
       print("Not in", directives["context"], "- Skipping")
       return

   table_blocks = extract_table_blocks(lines)
   output_lines = []
   i = 0
   while i < len(lines):
       line = lines[i]
       if line.strip().startswith(".. list-table::"):
           # Find the table block and replace
           for block in table_blocks:
               if lines[i:i+len(block)] == block:
                   processed = process_table(
                       block,
                       directives["remove_column"],
                       directives.get("remove_emptied_row", False)
                   )
                   output_lines.extend(processed)
                   i += len(block)
                   break
       else:
           output_lines.append(line)
           i += 1

   with open(path, 'w', encoding='utf-8') as f:
       f.writelines(l + ("\n" if not l.endswith("\n") else "") for l in output_lines)

# not used currently. We get a list of files from grep and loop
def scan_dir(directory):
   for root, _, files in os.walk(directory):
       for name in files:
           if name.endswith(".rst"):
               process_file(os.path.join(root, name))

if __name__ == "__main__":
   if len(sys.argv) != 2:
       print("Usage: python remove-columns.py <directory>")
       sys.exit(1)
   process_file(sys.argv[1])