Reimplement table column and row removal to be output format agnostic. Change-Id: I4822d53d37fd4604bf45c4bc4a315c8fc904376a Signed-off-by: Ron Stone <ronald.stone@windriver.com>
		
			
				
	
	
		
			477 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			477 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#!/usr/bin/env python3
 | 
						|
"""
 | 
						|
Script to remove columns from reStructuredText grid tables based on column names.
 | 
						|
Only processes grid tables that are declared using the .. table:: directive.
 | 
						|
Column names to remove are specified using field lists in the RST file itself.
 | 
						|
 | 
						|
Usage:
 | 
						|
  python remove_rst_columns.py input.rst -o output.rst
 | 
						|
  python remove_rst_columns.py input.rst --output output.rst
 | 
						|
 | 
						|
If no output file is specified, the result is printed to stdout.
 | 
						|
 | 
						|
The RST file should contain field list entries specifying columns to remove:
 | 
						|
  :remove-column-from-html-table: Column1, Column2, Column3
 | 
						|
 | 
						|
Note: Only grid tables declared with the .. table:: directive will be modified.
 | 
						|
Standalone grid tables without the directive will be left unchanged.
 | 
						|
"""
 | 
						|
 | 
						|
import argparse
 | 
						|
import sys
 | 
						|
import re
 | 
						|
import os
 | 
						|
from typing import List, Tuple, Optional
 | 
						|
 | 
						|
 | 
						|
def parse_grid_table(lines: List[str], start_idx: int) -> Tuple[List[List[List[str]]], List[int], int]:
 | 
						|
    """
 | 
						|
    Parse a reStructuredText grid table starting at the given line index.
 | 
						|
 | 
						|
    Returns:
 | 
						|
        - table_data: List of rows, where each row is a list of cells, 
 | 
						|
          and each cell is a list of lines (to preserve multi-line content)
 | 
						|
        - col_widths: List of column widths
 | 
						|
        - end_idx: Index of the line after the table
 | 
						|
    """
 | 
						|
    # Find the table boundaries
 | 
						|
    table_lines = []
 | 
						|
    i = start_idx
 | 
						|
 | 
						|
    # Skip to first border line
 | 
						|
    while i < len(lines) and not lines[i].strip().startswith('+'):
 | 
						|
        i += 1
 | 
						|
 | 
						|
    if i >= len(lines):
 | 
						|
        return [], [], start_idx
 | 
						|
 | 
						|
    # Collect all table lines
 | 
						|
    while i < len(lines) and (lines[i].strip().startswith('+') or lines[i].strip().startswith('|')):
 | 
						|
        table_lines.append(lines[i])
 | 
						|
        i += 1
 | 
						|
 | 
						|
    if not table_lines:
 | 
						|
        return [], [], start_idx
 | 
						|
 | 
						|
    # Parse column positions from the first border line
 | 
						|
    border_line = table_lines[0]
 | 
						|
    col_positions = []
 | 
						|
    for match in re.finditer(r'\+', border_line):
 | 
						|
        col_positions.append(match.start())
 | 
						|
 | 
						|
    if len(col_positions) < 2:
 | 
						|
        return [], [], i
 | 
						|
 | 
						|
    # Calculate column widths
 | 
						|
    col_widths = []
 | 
						|
    for j in range(len(col_positions) - 1):
 | 
						|
        col_widths.append(col_positions[j + 1] - col_positions[j] - 1)
 | 
						|
 | 
						|
    # Parse table data
 | 
						|
    table_data = []
 | 
						|
    current_row = None
 | 
						|
 | 
						|
    for line in table_lines:
 | 
						|
        if line.strip().startswith('+'):
 | 
						|
            # Border line - if we have a current row, add it to table_data
 | 
						|
            if current_row is not None:
 | 
						|
                table_data.append(current_row)
 | 
						|
                current_row = None
 | 
						|
        elif line.strip().startswith('|'):
 | 
						|
            # Data line
 | 
						|
            if current_row is None:
 | 
						|
                current_row = [[] for _ in range(len(col_positions) - 1)]
 | 
						|
 | 
						|
            # Extract cell contents
 | 
						|
            for j in range(len(col_positions) - 1):
 | 
						|
                start_pos = col_positions[j] + 1
 | 
						|
                end_pos = col_positions[j + 1]
 | 
						|
                cell_content = line[start_pos:end_pos].rstrip()  # Only strip right whitespace
 | 
						|
                
 | 
						|
                # Add this line to the cell (preserving empty lines and indentation)
 | 
						|
                current_row[j].append(cell_content)
 | 
						|
 | 
						|
    # Add the last row if it exists
 | 
						|
    if current_row is not None:
 | 
						|
        table_data.append(current_row)
 | 
						|
 | 
						|
    return table_data, col_widths, i
 | 
						|
 | 
						|
 | 
						|
def find_column_removal_directives(lines: List[str]) -> List[str]:
 | 
						|
    """
 | 
						|
    Find field list entries that specify columns to remove.
 | 
						|
    Looks for entries like: :remove-column-from-html-table: Column1, Column2
 | 
						|
 | 
						|
    Returns a list of column names to remove.
 | 
						|
    """
 | 
						|
    columns_to_remove = []
 | 
						|
 | 
						|
    for line in lines:
 | 
						|
        stripped_line = line.strip()
 | 
						|
 | 
						|
        # Look for the field list entry
 | 
						|
        if stripped_line.startswith(':remove-column-from-html-table:'):
 | 
						|
            # Extract the column names after the colon
 | 
						|
            field_content = stripped_line[len(':remove-column-from-html-table:'):].strip()
 | 
						|
 | 
						|
            if field_content:
 | 
						|
                # Split by comma and clean up each column name
 | 
						|
                column_names = [name.strip() for name in field_content.split(',')]
 | 
						|
                columns_to_remove.extend([name for name in column_names if name])
 | 
						|
 | 
						|
    return columns_to_remove
 | 
						|
 | 
						|
 | 
						|
def check_docs_build_context(lines: List[str]) -> None:
 | 
						|
    """
 | 
						|
    Check if the docs-build-context directive matches the DOCS_BUILD_CONTEXT environment variable.
 | 
						|
    Exit the script if they don't match.
 | 
						|
    """
 | 
						|
    docs_context = None
 | 
						|
 | 
						|
    for line in lines:
 | 
						|
        stripped_line = line.strip()
 | 
						|
        if stripped_line.startswith(':docs-build-context:'):
 | 
						|
            field_content = stripped_line[len(':docs-build-context:'):].strip()
 | 
						|
            if field_content:
 | 
						|
                docs_context = field_content
 | 
						|
                break
 | 
						|
 | 
						|
    if docs_context is not None:
 | 
						|
        env_context = os.environ.get('DOCS_BUILD_CONTEXT')
 | 
						|
        if env_context != docs_context:
 | 
						|
            print(f"Docs-build-context '{docs_context}' does not match DOCS_BUILD_CONTEXT environment variable '{env_context}'. Skipping.", file=sys.stderr)
 | 
						|
            sys.exit(0)
 | 
						|
        print(f"docs-build-context '{docs_context}' matches environment variable", file=sys.stderr)
 | 
						|
 | 
						|
 | 
						|
def should_remove_emptied_rows(lines: List[str]) -> bool:
 | 
						|
    """
 | 
						|
    Check if the remove-column-emptied-row directive is set to 1.
 | 
						|
    """
 | 
						|
    for line in lines:
 | 
						|
        stripped_line = line.strip()
 | 
						|
        if stripped_line.startswith(':remove-column-emptied-row:'):
 | 
						|
            field_content = stripped_line[len(':remove-column-emptied-row:'):].strip()
 | 
						|
            return field_content == '1'
 | 
						|
    return False
 | 
						|
 | 
						|
 | 
						|
def find_first_table_position(lines: List[str]) -> int:
 | 
						|
    """
 | 
						|
    Find the position of the first table in the document.
 | 
						|
    Returns the line number of the first table, or len(lines) if no table found.
 | 
						|
    """
 | 
						|
    for i, line in enumerate(lines):
 | 
						|
        if line.strip().startswith('+') and '-' in line:
 | 
						|
            # Check if this is actually a table by looking for the directive
 | 
						|
            is_table, _, _, _ = find_table_directive(lines, i)
 | 
						|
            if is_table:
 | 
						|
                return i
 | 
						|
    return len(lines)
 | 
						|
 | 
						|
 | 
						|
def find_column_indices(headers: List[List[str]], columns_to_remove: List[str]) -> List[int]:
 | 
						|
    """Find the indices of columns to remove based on header names."""
 | 
						|
    indices = []
 | 
						|
    
 | 
						|
    # Convert headers (which are lists of lines) to single strings for comparison
 | 
						|
    header_strings = []
 | 
						|
    for header_cell in headers:
 | 
						|
        # Join all lines in the cell and strip whitespace
 | 
						|
        header_text = '\n'.join(header_cell).strip()
 | 
						|
        header_strings.append(header_text)
 | 
						|
    
 | 
						|
    for col_name in columns_to_remove:
 | 
						|
        # Try exact match first
 | 
						|
        if col_name in header_strings:
 | 
						|
            indices.append(header_strings.index(col_name))
 | 
						|
        else:
 | 
						|
            # Try case-insensitive match
 | 
						|
            for i, header in enumerate(header_strings):
 | 
						|
                if header.lower() == col_name.lower():
 | 
						|
                    indices.append(i)
 | 
						|
                    break
 | 
						|
            else:
 | 
						|
                print(f"Warning: Column '{col_name}' not found in table headers", file=sys.stderr)
 | 
						|
 | 
						|
    return sorted(set(indices), reverse=True)  # Remove duplicates and sort in reverse order
 | 
						|
 | 
						|
 | 
						|
def remove_columns_and_empty_rows(table_data: List[List[List[str]]], col_indices: List[int], remove_empty_rows: bool = False) -> List[List[List[str]]]:
 | 
						|
    """Remove specified columns from table data and optionally remove rows that become empty."""
 | 
						|
    new_table = []
 | 
						|
    for row in table_data:
 | 
						|
        new_row = row.copy()
 | 
						|
        for idx in col_indices:
 | 
						|
            if idx < len(new_row):
 | 
						|
                new_row.pop(idx)
 | 
						|
 | 
						|
        # If remove_empty_rows is True, check if the row is now empty (all cells are empty or whitespace)
 | 
						|
        if remove_empty_rows:
 | 
						|
            row_is_empty = True
 | 
						|
            for cell in new_row:
 | 
						|
                cell_content = '\n'.join(cell).strip()
 | 
						|
                if cell_content:
 | 
						|
                    row_is_empty = False
 | 
						|
                    break
 | 
						|
            if row_is_empty:
 | 
						|
                continue  # Skip this row as it's now empty
 | 
						|
 | 
						|
        new_table.append(new_row)
 | 
						|
    return new_table
 | 
						|
 | 
						|
 | 
						|
def calculate_column_widths(table_data: List[List[List[str]]]) -> List[int]:
 | 
						|
    """Calculate the minimum width needed for each column, considering multi-line content."""
 | 
						|
    if not table_data:
 | 
						|
        return []
 | 
						|
 | 
						|
    num_cols = len(table_data[0])
 | 
						|
    col_widths = [0] * num_cols
 | 
						|
 | 
						|
    for row in table_data:
 | 
						|
        for i, cell in enumerate(row):
 | 
						|
            if i < len(col_widths):
 | 
						|
                # Find the maximum line width in this cell
 | 
						|
                max_line_width = 0
 | 
						|
                for line in cell:
 | 
						|
                    max_line_width = max(max_line_width, len(line.rstrip()))
 | 
						|
                col_widths[i] = max(col_widths[i], max_line_width)
 | 
						|
 | 
						|
    return col_widths
 | 
						|
 | 
						|
 | 
						|
def generate_border_line(col_widths: List[int], indent: str = '') -> str:
 | 
						|
    """Generate a border line for the table with proper indentation."""
 | 
						|
    parts = ['+']
 | 
						|
    for width in col_widths:
 | 
						|
        parts.append('-' * width + '+')
 | 
						|
    return indent + ''.join(parts)
 | 
						|
 | 
						|
 | 
						|
def generate_data_lines(row: List[List[str]], col_widths: List[int], indent: str = '') -> List[str]:
 | 
						|
    """Generate data lines for a table row, handling multi-line cells with proper indentation."""
 | 
						|
    # Find the maximum number of lines in any cell of this row
 | 
						|
    max_lines = max(len(cell) for cell in row) if row else 0
 | 
						|
    
 | 
						|
    data_lines = []
 | 
						|
    
 | 
						|
    for line_idx in range(max_lines):
 | 
						|
        parts = ['|']
 | 
						|
        for col_idx, cell in enumerate(row):
 | 
						|
            if col_idx < len(col_widths):
 | 
						|
                # Get the content for this line of the cell, or empty string if no more lines
 | 
						|
                if line_idx < len(cell):
 | 
						|
                    cell_content = cell[line_idx].rstrip()
 | 
						|
                else:
 | 
						|
                    cell_content = ''
 | 
						|
                
 | 
						|
                # Pad the cell content to the column width
 | 
						|
                padded_cell = cell_content.ljust(col_widths[col_idx])
 | 
						|
                parts.append(padded_cell + '|')
 | 
						|
        
 | 
						|
        data_lines.append(indent + ''.join(parts))
 | 
						|
    
 | 
						|
    return data_lines
 | 
						|
 | 
						|
 | 
						|
def rebuild_table(table_data: List[List[List[str]]], indent: str = '') -> List[str]:
 | 
						|
    """Rebuild the grid table as a list of lines with proper indentation and preserved formatting."""
 | 
						|
    if not table_data:
 | 
						|
        return []
 | 
						|
 | 
						|
    col_widths = calculate_column_widths(table_data)
 | 
						|
    border_line = generate_border_line(col_widths, indent)
 | 
						|
 | 
						|
    lines = [border_line]
 | 
						|
 | 
						|
    for i, row in enumerate(table_data):
 | 
						|
        data_lines = generate_data_lines(row, col_widths, indent)
 | 
						|
        lines.extend(data_lines)
 | 
						|
        lines.append(border_line)
 | 
						|
 | 
						|
    return lines
 | 
						|
 | 
						|
 | 
						|
def find_table_directive(lines: List[str], start_idx: int) -> Tuple[bool, int, Optional[str], str]:
 | 
						|
    """
 | 
						|
    Check if there's a .. table:: directive before the given grid table.
 | 
						|
    The directive must be the first non-empty line when looking backwards from the table.
 | 
						|
 | 
						|
    Returns:
 | 
						|
        - is_table_directive: True if this grid table is declared with .. table::
 | 
						|
        - directive_start: Index where the directive starts
 | 
						|
        - table_title: Optional title from the directive
 | 
						|
        - table_indent: The indentation string used for the table
 | 
						|
    """
 | 
						|
    # Get the indentation of the table itself
 | 
						|
    table_line = lines[start_idx]
 | 
						|
    table_indent = ''
 | 
						|
    for char in table_line:
 | 
						|
        if char in ' \t':
 | 
						|
            table_indent += char
 | 
						|
        else:
 | 
						|
            break
 | 
						|
 | 
						|
    # Look backwards from the grid table start to find the first non-empty line
 | 
						|
    i = start_idx - 1
 | 
						|
 | 
						|
    # Skip empty lines immediately before the table
 | 
						|
    while i >= 0 and lines[i].strip() == '':
 | 
						|
        i -= 1
 | 
						|
 | 
						|
    if i < 0:
 | 
						|
        return False, -1, None, ''
 | 
						|
 | 
						|
    # The first non-empty line must be the .. table:: directive
 | 
						|
    line = lines[i]
 | 
						|
    stripped_line = line.strip()
 | 
						|
 | 
						|
    if stripped_line.startswith('.. table::') or \
 | 
						|
       stripped_line.startswith(':header-rows') or \
 | 
						|
       stripped_line.startswith(':widths'):
 | 
						|
        # Found the directive as the first non-empty line
 | 
						|
        table_title = None
 | 
						|
        if len(stripped_line) > 10:  # More than just ".. table::"
 | 
						|
            table_title = stripped_line[10:].strip()
 | 
						|
        return True, i, table_title, table_indent
 | 
						|
 | 
						|
    # First non-empty line is not .. table:: directive
 | 
						|
    return False, -1, None, ''
 | 
						|
 | 
						|
 | 
						|
def process_rst_file(content: str, columns_to_remove: List[str], remove_empty_rows: bool = False) -> str:
 | 
						|
    """Process the entire RST content and remove specified columns from grid tables declared with .. table:: directive."""
 | 
						|
    lines = content.split('\n')
 | 
						|
    result_lines = []
 | 
						|
    i = 0
 | 
						|
 | 
						|
    while i < len(lines):
 | 
						|
        line = lines[i]
 | 
						|
 | 
						|
        # Check if this line might be the start of a grid table
 | 
						|
        if line.strip().startswith('+') and '-' in line:
 | 
						|
            # Check if this grid table is declared with .. table:: directive
 | 
						|
            is_directive_table, directive_start, table_title, table_indent = find_table_directive(lines, i)
 | 
						|
 | 
						|
            if is_directive_table:
 | 
						|
                # Try to parse as a grid table
 | 
						|
                table_data, original_col_widths, end_idx = parse_grid_table(lines, i)
 | 
						|
 | 
						|
                if table_data and len(table_data) > 0:
 | 
						|
                    # Assume first row contains headers
 | 
						|
                    headers = table_data[0]
 | 
						|
                    col_indices = find_column_indices(headers, columns_to_remove)
 | 
						|
 | 
						|
                    # Add any lines we haven't processed yet up to the current position
 | 
						|
                    while len(result_lines) < i:
 | 
						|
                        result_lines.append(lines[len(result_lines)])
 | 
						|
 | 
						|
                    if col_indices:
 | 
						|
                        # Remove specified columns and rebuild table with preserved indentation
 | 
						|
                        new_table_data = remove_columns_and_empty_rows(table_data, col_indices, remove_empty_rows)
 | 
						|
                        new_table_lines = rebuild_table(new_table_data, table_indent)
 | 
						|
                        result_lines.extend(new_table_lines)
 | 
						|
 | 
						|
                        title_info = f" ('{table_title}')" if table_title else ""
 | 
						|
                        removed_cols_info = ['\n'.join(headers[idx]).strip() for idx in sorted(col_indices)]
 | 
						|
                        empty_rows_info = " and empty rows" if remove_empty_rows else ""
 | 
						|
                        print(f"Processed table{title_info}: removed columns {removed_cols_info}{empty_rows_info}", file=sys.stderr)
 | 
						|
                    else:
 | 
						|
                        # No columns to remove, keep original table
 | 
						|
                        result_lines.extend(lines[i:end_idx])
 | 
						|
 | 
						|
                    i = end_idx
 | 
						|
                else:
 | 
						|
                    # Not a valid grid table, keep the line
 | 
						|
                    result_lines.append(line)
 | 
						|
                    i += 1
 | 
						|
            else:
 | 
						|
                # Grid table not declared with .. table:: directive, keep it unchanged
 | 
						|
                result_lines.append(line)
 | 
						|
                i += 1
 | 
						|
        else:
 | 
						|
            # Regular line, keep it
 | 
						|
            result_lines.append(line)
 | 
						|
            i += 1
 | 
						|
 | 
						|
    return '\n'.join(result_lines)
 | 
						|
 | 
						|
 | 
						|
def main():
 | 
						|
    parser = argparse.ArgumentParser(
 | 
						|
        description='Remove columns from reStructuredText grid tables declared with .. table:: directive',
 | 
						|
        formatter_class=argparse.RawDescriptionHelpFormatter,
 | 
						|
        epilog="""
 | 
						|
Examples:
 | 
						|
  %(prog)s input.rst -o output.rst
 | 
						|
  %(prog)s input.rst --output output.rst
 | 
						|
  cat input.rst | %(prog)s - > output.rst
 | 
						|
 | 
						|
The RST file should contain field list entries specifying columns to remove:
 | 
						|
  :remove-column-from-html-table: Column1, Column2, Column3
 | 
						|
 | 
						|
Note: Only processes grid tables declared with the .. table:: directive.
 | 
						|
Standalone grid tables will be left unchanged.
 | 
						|
        """
 | 
						|
    )
 | 
						|
 | 
						|
    parser.add_argument('input',
 | 
						|
                       help='Input RST file (use "-" for stdin)')
 | 
						|
    parser.add_argument('-o', '--output',
 | 
						|
                       help='Output file (default: stdout)')
 | 
						|
 | 
						|
    args = parser.parse_args()
 | 
						|
 | 
						|
    # Read input
 | 
						|
    if args.input == '-':
 | 
						|
        content = sys.stdin.read()
 | 
						|
    else:
 | 
						|
        try:
 | 
						|
            with open(args.input, 'r', encoding='utf-8') as f:
 | 
						|
                content = f.read()
 | 
						|
        except IOError as e:
 | 
						|
            print(f"Error reading input file: {e}", file=sys.stderr)
 | 
						|
            sys.exit(1)
 | 
						|
 | 
						|
    # Find columns to remove from the content itself
 | 
						|
    lines = content.split('\n')
 | 
						|
 | 
						|
    # Check docs-build-context against environment variable
 | 
						|
    check_docs_build_context(lines)
 | 
						|
 | 
						|
    # Find columns to remove and whether to remove emptied rows
 | 
						|
    columns_to_remove = find_column_removal_directives(lines)
 | 
						|
    remove_empty_rows = should_remove_emptied_rows(lines)
 | 
						|
 | 
						|
    if not columns_to_remove:
 | 
						|
        print("No column removal directives found in the file.", file=sys.stderr)
 | 
						|
        print("Looking for: :remove-column-from-html-table: Column1, Column2", file=sys.stderr)
 | 
						|
 | 
						|
    if remove_empty_rows:
 | 
						|
        print("Will remove rows that become empty after column removal.", file=sys.stderr)
 | 
						|
 | 
						|
    # Process content
 | 
						|
    try:
 | 
						|
        result = process_rst_file(content, columns_to_remove, remove_empty_rows)
 | 
						|
    except Exception as e:
 | 
						|
        print(f"Error processing file: {e}", file=sys.stderr)
 | 
						|
        sys.exit(1)
 | 
						|
 | 
						|
    # Write output
 | 
						|
    if args.output:
 | 
						|
        try:
 | 
						|
            with open(args.output, 'w', encoding='utf-8') as f:
 | 
						|
                f.write(result)
 | 
						|
        except IOError as e:
 | 
						|
            print(f"Error writing output file: {e}", file=sys.stderr)
 | 
						|
            sys.exit(1)
 | 
						|
    else:
 | 
						|
        print(result, end='')
 | 
						|
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    main()
 |