Files
docs/remove-grid-columns.py
Ron Stone 729f31198c Reimplement table col/row conditionalization (r10)
Reimplement table column and row removal to be output format agnostic.

Change-Id: I4822d53d37fd4604bf45c4bc4a315c8fc904376a
Signed-off-by: Ron Stone <ronald.stone@windriver.com>
2025-06-05 13:38:20 +00:00

477 lines
17 KiB
Python

#!/usr/bin/env python3
"""
Script to remove columns from reStructuredText grid tables based on column names.
Only processes grid tables that are declared using the .. table:: directive.
Column names to remove are specified using field lists in the RST file itself.
Usage:
python remove_rst_columns.py input.rst -o output.rst
python remove_rst_columns.py input.rst --output output.rst
If no output file is specified, the result is printed to stdout.
The RST file should contain field list entries specifying columns to remove:
:remove-column-from-html-table: Column1, Column2, Column3
Note: Only grid tables declared with the .. table:: directive will be modified.
Standalone grid tables without the directive will be left unchanged.
"""
import argparse
import sys
import re
import os
from typing import List, Tuple, Optional
def parse_grid_table(lines: List[str], start_idx: int) -> Tuple[List[List[List[str]]], List[int], int]:
"""
Parse a reStructuredText grid table starting at the given line index.
Returns:
- table_data: List of rows, where each row is a list of cells,
and each cell is a list of lines (to preserve multi-line content)
- col_widths: List of column widths
- end_idx: Index of the line after the table
"""
# Find the table boundaries
table_lines = []
i = start_idx
# Skip to first border line
while i < len(lines) and not lines[i].strip().startswith('+'):
i += 1
if i >= len(lines):
return [], [], start_idx
# Collect all table lines
while i < len(lines) and (lines[i].strip().startswith('+') or lines[i].strip().startswith('|')):
table_lines.append(lines[i])
i += 1
if not table_lines:
return [], [], start_idx
# Parse column positions from the first border line
border_line = table_lines[0]
col_positions = []
for match in re.finditer(r'\+', border_line):
col_positions.append(match.start())
if len(col_positions) < 2:
return [], [], i
# Calculate column widths
col_widths = []
for j in range(len(col_positions) - 1):
col_widths.append(col_positions[j + 1] - col_positions[j] - 1)
# Parse table data
table_data = []
current_row = None
for line in table_lines:
if line.strip().startswith('+'):
# Border line - if we have a current row, add it to table_data
if current_row is not None:
table_data.append(current_row)
current_row = None
elif line.strip().startswith('|'):
# Data line
if current_row is None:
current_row = [[] for _ in range(len(col_positions) - 1)]
# Extract cell contents
for j in range(len(col_positions) - 1):
start_pos = col_positions[j] + 1
end_pos = col_positions[j + 1]
cell_content = line[start_pos:end_pos].rstrip() # Only strip right whitespace
# Add this line to the cell (preserving empty lines and indentation)
current_row[j].append(cell_content)
# Add the last row if it exists
if current_row is not None:
table_data.append(current_row)
return table_data, col_widths, i
def find_column_removal_directives(lines: List[str]) -> List[str]:
"""
Find field list entries that specify columns to remove.
Looks for entries like: :remove-column-from-html-table: Column1, Column2
Returns a list of column names to remove.
"""
columns_to_remove = []
for line in lines:
stripped_line = line.strip()
# Look for the field list entry
if stripped_line.startswith(':remove-column-from-html-table:'):
# Extract the column names after the colon
field_content = stripped_line[len(':remove-column-from-html-table:'):].strip()
if field_content:
# Split by comma and clean up each column name
column_names = [name.strip() for name in field_content.split(',')]
columns_to_remove.extend([name for name in column_names if name])
return columns_to_remove
def check_docs_build_context(lines: List[str]) -> None:
"""
Check if the docs-build-context directive matches the DOCS_BUILD_CONTEXT environment variable.
Exit the script if they don't match.
"""
docs_context = None
for line in lines:
stripped_line = line.strip()
if stripped_line.startswith(':docs-build-context:'):
field_content = stripped_line[len(':docs-build-context:'):].strip()
if field_content:
docs_context = field_content
break
if docs_context is not None:
env_context = os.environ.get('DOCS_BUILD_CONTEXT')
if env_context != docs_context:
print(f"Docs-build-context '{docs_context}' does not match DOCS_BUILD_CONTEXT environment variable '{env_context}'. Skipping.", file=sys.stderr)
sys.exit(0)
print(f"docs-build-context '{docs_context}' matches environment variable", file=sys.stderr)
def should_remove_emptied_rows(lines: List[str]) -> bool:
"""
Check if the remove-column-emptied-row directive is set to 1.
"""
for line in lines:
stripped_line = line.strip()
if stripped_line.startswith(':remove-column-emptied-row:'):
field_content = stripped_line[len(':remove-column-emptied-row:'):].strip()
return field_content == '1'
return False
def find_first_table_position(lines: List[str]) -> int:
"""
Find the position of the first table in the document.
Returns the line number of the first table, or len(lines) if no table found.
"""
for i, line in enumerate(lines):
if line.strip().startswith('+') and '-' in line:
# Check if this is actually a table by looking for the directive
is_table, _, _, _ = find_table_directive(lines, i)
if is_table:
return i
return len(lines)
def find_column_indices(headers: List[List[str]], columns_to_remove: List[str]) -> List[int]:
"""Find the indices of columns to remove based on header names."""
indices = []
# Convert headers (which are lists of lines) to single strings for comparison
header_strings = []
for header_cell in headers:
# Join all lines in the cell and strip whitespace
header_text = '\n'.join(header_cell).strip()
header_strings.append(header_text)
for col_name in columns_to_remove:
# Try exact match first
if col_name in header_strings:
indices.append(header_strings.index(col_name))
else:
# Try case-insensitive match
for i, header in enumerate(header_strings):
if header.lower() == col_name.lower():
indices.append(i)
break
else:
print(f"Warning: Column '{col_name}' not found in table headers", file=sys.stderr)
return sorted(set(indices), reverse=True) # Remove duplicates and sort in reverse order
def remove_columns_and_empty_rows(table_data: List[List[List[str]]], col_indices: List[int], remove_empty_rows: bool = False) -> List[List[List[str]]]:
"""Remove specified columns from table data and optionally remove rows that become empty."""
new_table = []
for row in table_data:
new_row = row.copy()
for idx in col_indices:
if idx < len(new_row):
new_row.pop(idx)
# If remove_empty_rows is True, check if the row is now empty (all cells are empty or whitespace)
if remove_empty_rows:
row_is_empty = True
for cell in new_row:
cell_content = '\n'.join(cell).strip()
if cell_content:
row_is_empty = False
break
if row_is_empty:
continue # Skip this row as it's now empty
new_table.append(new_row)
return new_table
def calculate_column_widths(table_data: List[List[List[str]]]) -> List[int]:
"""Calculate the minimum width needed for each column, considering multi-line content."""
if not table_data:
return []
num_cols = len(table_data[0])
col_widths = [0] * num_cols
for row in table_data:
for i, cell in enumerate(row):
if i < len(col_widths):
# Find the maximum line width in this cell
max_line_width = 0
for line in cell:
max_line_width = max(max_line_width, len(line.rstrip()))
col_widths[i] = max(col_widths[i], max_line_width)
return col_widths
def generate_border_line(col_widths: List[int], indent: str = '') -> str:
"""Generate a border line for the table with proper indentation."""
parts = ['+']
for width in col_widths:
parts.append('-' * width + '+')
return indent + ''.join(parts)
def generate_data_lines(row: List[List[str]], col_widths: List[int], indent: str = '') -> List[str]:
"""Generate data lines for a table row, handling multi-line cells with proper indentation."""
# Find the maximum number of lines in any cell of this row
max_lines = max(len(cell) for cell in row) if row else 0
data_lines = []
for line_idx in range(max_lines):
parts = ['|']
for col_idx, cell in enumerate(row):
if col_idx < len(col_widths):
# Get the content for this line of the cell, or empty string if no more lines
if line_idx < len(cell):
cell_content = cell[line_idx].rstrip()
else:
cell_content = ''
# Pad the cell content to the column width
padded_cell = cell_content.ljust(col_widths[col_idx])
parts.append(padded_cell + '|')
data_lines.append(indent + ''.join(parts))
return data_lines
def rebuild_table(table_data: List[List[List[str]]], indent: str = '') -> List[str]:
"""Rebuild the grid table as a list of lines with proper indentation and preserved formatting."""
if not table_data:
return []
col_widths = calculate_column_widths(table_data)
border_line = generate_border_line(col_widths, indent)
lines = [border_line]
for i, row in enumerate(table_data):
data_lines = generate_data_lines(row, col_widths, indent)
lines.extend(data_lines)
lines.append(border_line)
return lines
def find_table_directive(lines: List[str], start_idx: int) -> Tuple[bool, int, Optional[str], str]:
"""
Check if there's a .. table:: directive before the given grid table.
The directive must be the first non-empty line when looking backwards from the table.
Returns:
- is_table_directive: True if this grid table is declared with .. table::
- directive_start: Index where the directive starts
- table_title: Optional title from the directive
- table_indent: The indentation string used for the table
"""
# Get the indentation of the table itself
table_line = lines[start_idx]
table_indent = ''
for char in table_line:
if char in ' \t':
table_indent += char
else:
break
# Look backwards from the grid table start to find the first non-empty line
i = start_idx - 1
# Skip empty lines immediately before the table
while i >= 0 and lines[i].strip() == '':
i -= 1
if i < 0:
return False, -1, None, ''
# The first non-empty line must be the .. table:: directive
line = lines[i]
stripped_line = line.strip()
if stripped_line.startswith('.. table::') or \
stripped_line.startswith(':header-rows') or \
stripped_line.startswith(':widths'):
# Found the directive as the first non-empty line
table_title = None
if len(stripped_line) > 10: # More than just ".. table::"
table_title = stripped_line[10:].strip()
return True, i, table_title, table_indent
# First non-empty line is not .. table:: directive
return False, -1, None, ''
def process_rst_file(content: str, columns_to_remove: List[str], remove_empty_rows: bool = False) -> str:
"""Process the entire RST content and remove specified columns from grid tables declared with .. table:: directive."""
lines = content.split('\n')
result_lines = []
i = 0
while i < len(lines):
line = lines[i]
# Check if this line might be the start of a grid table
if line.strip().startswith('+') and '-' in line:
# Check if this grid table is declared with .. table:: directive
is_directive_table, directive_start, table_title, table_indent = find_table_directive(lines, i)
if is_directive_table:
# Try to parse as a grid table
table_data, original_col_widths, end_idx = parse_grid_table(lines, i)
if table_data and len(table_data) > 0:
# Assume first row contains headers
headers = table_data[0]
col_indices = find_column_indices(headers, columns_to_remove)
# Add any lines we haven't processed yet up to the current position
while len(result_lines) < i:
result_lines.append(lines[len(result_lines)])
if col_indices:
# Remove specified columns and rebuild table with preserved indentation
new_table_data = remove_columns_and_empty_rows(table_data, col_indices, remove_empty_rows)
new_table_lines = rebuild_table(new_table_data, table_indent)
result_lines.extend(new_table_lines)
title_info = f" ('{table_title}')" if table_title else ""
removed_cols_info = ['\n'.join(headers[idx]).strip() for idx in sorted(col_indices)]
empty_rows_info = " and empty rows" if remove_empty_rows else ""
print(f"Processed table{title_info}: removed columns {removed_cols_info}{empty_rows_info}", file=sys.stderr)
else:
# No columns to remove, keep original table
result_lines.extend(lines[i:end_idx])
i = end_idx
else:
# Not a valid grid table, keep the line
result_lines.append(line)
i += 1
else:
# Grid table not declared with .. table:: directive, keep it unchanged
result_lines.append(line)
i += 1
else:
# Regular line, keep it
result_lines.append(line)
i += 1
return '\n'.join(result_lines)
def main():
parser = argparse.ArgumentParser(
description='Remove columns from reStructuredText grid tables declared with .. table:: directive',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s input.rst -o output.rst
%(prog)s input.rst --output output.rst
cat input.rst | %(prog)s - > output.rst
The RST file should contain field list entries specifying columns to remove:
:remove-column-from-html-table: Column1, Column2, Column3
Note: Only processes grid tables declared with the .. table:: directive.
Standalone grid tables will be left unchanged.
"""
)
parser.add_argument('input',
help='Input RST file (use "-" for stdin)')
parser.add_argument('-o', '--output',
help='Output file (default: stdout)')
args = parser.parse_args()
# Read input
if args.input == '-':
content = sys.stdin.read()
else:
try:
with open(args.input, 'r', encoding='utf-8') as f:
content = f.read()
except IOError as e:
print(f"Error reading input file: {e}", file=sys.stderr)
sys.exit(1)
# Find columns to remove from the content itself
lines = content.split('\n')
# Check docs-build-context against environment variable
check_docs_build_context(lines)
# Find columns to remove and whether to remove emptied rows
columns_to_remove = find_column_removal_directives(lines)
remove_empty_rows = should_remove_emptied_rows(lines)
if not columns_to_remove:
print("No column removal directives found in the file.", file=sys.stderr)
print("Looking for: :remove-column-from-html-table: Column1, Column2", file=sys.stderr)
if remove_empty_rows:
print("Will remove rows that become empty after column removal.", file=sys.stderr)
# Process content
try:
result = process_rst_file(content, columns_to_remove, remove_empty_rows)
except Exception as e:
print(f"Error processing file: {e}", file=sys.stderr)
sys.exit(1)
# Write output
if args.output:
try:
with open(args.output, 'w', encoding='utf-8') as f:
f.write(result)
except IOError as e:
print(f"Error writing output file: {e}", file=sys.stderr)
sys.exit(1)
else:
print(result, end='')
if __name__ == '__main__':
main()