docs/remove-list-columns.py

import os
import re
import sys

def parse_meta_directives(lines):
   directives = {}
   for line in lines:
       if ":remove-column-from-html-table:" in line:
           directives["remove_column"] = line.split(":", 2)[2].strip()
       if ":remove-column-emptied-row:" in line:
           directives["remove_emptied_row"] = line.split(":", 2)[2].strip() == "1"
       if ":docs-build-context:" in line:
           directives["context"] = line.split(":", 2)[2].strip()

   return directives

def extract_table_blocks(lines):
   blocks = []
   current = []
   inside = False
   for line in lines:
       if line.strip().startswith(".. list-table::"):
           inside = True
           current = [line]
       elif inside and line.startswith("  ") or line.strip() == "":
           current.append(line)
       elif inside:
           blocks.append(current)
           inside = False
   if inside:
       blocks.append(current)
   return blocks

def split_table_row(row_lines):
    """Splits a table row (beginning with '*') into a list of cells, preserving original line spacing."""
    import re
    cells = []
    current_cell = []
    for line in row_lines:
        if re.match(r'^\s*\*\s+-', line):  # First cell in row
            # Keep only the cell content (text after "* - ")
            parts = re.split(r'^\s*\*\s+-\s*', line, maxsplit=1)
            current_cell = [parts[1]]
        elif re.match(r'^\s*-\s+', line):  # New cell
            # Finish previous cell and start a new one (text after "- ")
            cells.append(current_cell)
            parts = re.split(r'^\s*-\s+', line, maxsplit=1)
            current_cell = [parts[1]]
        else:
            # Continuation line: keep exactly as-is (including leading spaces)
            current_cell.append(line)
    cells.append(current_cell)
    return cells

def join_cells(cells, base_indent):
    """Reconstructs a list-table row from cell lists.

    Continuation lines are aligned to the cell's content column (the column where the
    first line's text starts), and any additional original indentation beyond that is preserved.
    This also preserves the deeper indentation used by directive option lines.
    """
    # In a list-table, content starts 4 characters after the list marker for both
    # the first cell ("* - ") and other cells ("  - ").
    content_col_len = len(base_indent) + 4  # baseline spaces before content in any cell

    def count_leading_spaces(s: str) -> int:
        n = 0
        for ch in s:
            if ch == ' ':
                n += 1
            elif ch == '\t':
                # tabs aren't expected in RST, but treat as 4 spaces if present
                n += 4
            else:
                break
        return n

    out = []

    # First cell
    first_line_text = cells[0][0].rstrip('\n')
    out.append(f"{base_indent}* - {first_line_text.rstrip()}")

    for line in cells[0][1:]:
        if line.strip() == "":
            out.append("")  # preserve blank lines
            continue
        s = line.rstrip('\n')
        lead = count_leading_spaces(s)
        extra = max(0, lead - content_col_len)  # extra indent beyond the baseline content column
        out.append((" " * content_col_len) + (" " * extra) + s.lstrip(' '))

    # Remaining cells
    for cell in cells[1:]:
        first_line_text = cell[0].rstrip('\n')
        out.append(f"{base_indent}  - {first_line_text.rstrip()}")
        for line in cell[1:]:
            if line.strip() == "":
                out.append("")
                continue
            s = line.rstrip('\n')
            lead = count_leading_spaces(s)
            extra = max(0, lead - content_col_len)
            out.append((" " * content_col_len) + (" " * extra) + s.lstrip(' '))

    return out


def process_table(table_lines, cols_to_remove_str, remove_empty_row=False):
   # Parse comma-separated column names
   cols_to_remove = [col.strip() for col in cols_to_remove_str.split(',')]

   processed = []
   table_rows = []
   header_indices = []
   header_row = []
   buffer = []

   for line in table_lines:
       if re.match(r'\s*\*\s+-', line):
           match = re.match(r'(\s*)\*\s+-', line)
           indentation = match.group(1)
           if buffer:
               table_rows.append(buffer)
           buffer = [line]
       elif buffer != [] and (line.strip() == "" or re.match(r'^\s*(-|[^*].*)$', line)):
           buffer.append(line)
       else:
           if buffer:
               table_rows.append(buffer)
               buffer = []
           processed.append(line)

   if buffer:
       table_rows.append(buffer)

   # Parse header row and find all matching column indices
   for i, row in enumerate(table_rows):
       if i == 0:
           cells = split_table_row(row)
           flat_cells = [' '.join(c).strip() for c in cells]

           # Find indices of all columns to remove
           for col_name in cols_to_remove:
               if col_name in flat_cells:
                   header_indices.append(flat_cells.index(col_name))

           # If no columns found, don't modify
           if not header_indices:
               return table_lines

           # Sort indices in descending order so we can remove from right to left
           header_indices.sort(reverse=True)
           header_row = cells
       break

   if not header_indices:
       return table_lines  # Don't modify

   # Remove the columns from each row
   new_rows = []
   for row in table_rows:
       cells = split_table_row(row)

       # Remove columns from right to left to preserve indices
       for header_index in header_indices:
           if header_index < len(cells):
               del cells[header_index]

       # Check if row should be removed (if it's empty after column removal)
       if remove_empty_row and all(not ''.join(cell).strip() for cell in cells):
           continue

       new_rows.append(join_cells(cells, indentation))

   return processed + [""] + [line for row in new_rows for line in row]

def process_file(path):
   with open(path, 'r', encoding='utf-8') as f:
       lines = f.readlines()

   directives = parse_meta_directives(lines)
   if "remove_column" not in directives:
       return
   if directives["context"] != os.environ.get('DOCS_BUILD_CONTEXT'):
       print("Not in", directives["context"], "- Skipping")
       return

   table_blocks = extract_table_blocks(lines)
   output_lines = []
   i = 0
   while i < len(lines):
       line = lines[i]
       if line.strip().startswith(".. list-table::"):
           # Find the table block and replace
           for block in table_blocks:
               if lines[i:i+len(block)] == block:
                   processed = process_table(
                       block,
                       directives["remove_column"],
                       directives.get("remove_emptied_row", False)
                   )
                   output_lines.extend(processed)
                   i += len(block)
                   break
       else:
           output_lines.append(line)
           i += 1

   with open(path, 'w', encoding='utf-8') as f:
       f.writelines(l + ("\n" if not l.endswith("\n") else "") for l in output_lines)

# not used currently. We get a list of files from grep and loop
def scan_dir(directory):
   for root, _, files in os.walk(directory):
       for name in files:
           if name.endswith(".rst"):
               process_file(os.path.join(root, name))

if __name__ == "__main__":
   if len(sys.argv) != 2:
       print("Usage: python remove-columns.py <directory>")
       sys.exit(1)
   process_file(sys.argv[1])