Reimplement table column and row removal to be output format agnostic. Change-Id: I4822d53d37fd4604bf45c4bc4a315c8fc904376a Signed-off-by: Ron Stone <ronald.stone@windriver.com>
		
			
				
	
	
		
			204 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			204 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#!/usr/bin/env python3
 | 
						|
"""
 | 
						|
Script to remove empty rows from reStructuredText grid tables and list tables.
 | 
						|
Supports both grid tables (with +---+ borders) and simple list tables.
 | 
						|
"""
 | 
						|
 | 
						|
import re
 | 
						|
import argparse
 | 
						|
import sys
 | 
						|
from pathlib import Path
 | 
						|
 | 
						|
 | 
						|
def is_list_row_empty(row_lines):
 | 
						|
    """
 | 
						|
    Check if a list table row contains only empty cells.
 | 
						|
    
 | 
						|
    Args:
 | 
						|
        row_lines (list): Lines that make up the row
 | 
						|
    
 | 
						|
    Returns:
 | 
						|
        bool: True if row contains only empty cells, False otherwise
 | 
						|
    """
 | 
						|
    # Join all lines and remove list markers
 | 
						|
    content = ' '.join(row_lines)
 | 
						|
    # Remove the first list marker
 | 
						|
    content = re.sub(r'^\s*\*\s*-\s*', '', content)
 | 
						|
    # Remove additional cell separators
 | 
						|
    content = re.sub(r'\s*-\s*', ' ', content)
 | 
						|
    # Check if anything meaningful remains
 | 
						|
    return not content.strip()
 | 
						|
 | 
						|
 | 
						|
def clean_list_table(table_text):
 | 
						|
    """
 | 
						|
    Remove empty rows from a reStructuredText list table.
 | 
						|
    
 | 
						|
    Args:
 | 
						|
        table_text (str): The complete list table text
 | 
						|
    
 | 
						|
    Returns:
 | 
						|
        str: Cleaned table text with empty rows removed
 | 
						|
    """
 | 
						|
    lines = table_text.split('\n')
 | 
						|
    cleaned_lines = []
 | 
						|
    
 | 
						|
    current_row_lines = []
 | 
						|
    in_row = False
 | 
						|
    
 | 
						|
    for i, line in enumerate(lines):
 | 
						|
        # Check if this line starts a new row (begins with *)
 | 
						|
        if re.match(r'^\s*\*\s*-', line):
 | 
						|
            # Process previous row if it exists
 | 
						|
            if current_row_lines:
 | 
						|
                if not is_list_row_empty(current_row_lines):
 | 
						|
                    cleaned_lines.extend(current_row_lines)
 | 
						|
                current_row_lines = []
 | 
						|
            
 | 
						|
            # Start new row
 | 
						|
            current_row_lines = [line]
 | 
						|
            in_row = True
 | 
						|
        elif in_row and re.match(r'^\s+-', line):
 | 
						|
            # This is a continuation of the current row (additional columns)
 | 
						|
            current_row_lines.append(line)
 | 
						|
        elif in_row and line.strip() == '':
 | 
						|
            # Empty line might end the row, but could also be within row
 | 
						|
            # Look ahead to see if next line continues the row
 | 
						|
            if i + 1 < len(lines) and re.match(r'^\s*-', lines[i + 1]):
 | 
						|
                current_row_lines.append(line)  # Part of current row
 | 
						|
            else:
 | 
						|
                # End of row
 | 
						|
                if not is_list_row_empty(current_row_lines):
 | 
						|
                    cleaned_lines.extend(current_row_lines)
 | 
						|
                current_row_lines = []
 | 
						|
                cleaned_lines.append(line)
 | 
						|
                in_row = False
 | 
						|
        elif in_row and re.match(r'^\s+\S', line):
 | 
						|
            # Multi-line cell content
 | 
						|
            current_row_lines.append(line)
 | 
						|
        else:
 | 
						|
            # Not in a row, or row ended
 | 
						|
            if current_row_lines:
 | 
						|
                if not is_list_row_empty(current_row_lines):
 | 
						|
                    cleaned_lines.extend(current_row_lines)
 | 
						|
                current_row_lines = []
 | 
						|
            cleaned_lines.append(line)
 | 
						|
            in_row = False
 | 
						|
    
 | 
						|
    # Handle the last row
 | 
						|
    if current_row_lines and not is_list_row_empty(current_row_lines):
 | 
						|
        cleaned_lines.extend(current_row_lines)
 | 
						|
    
 | 
						|
    return '\n'.join(cleaned_lines)
 | 
						|
 | 
						|
 | 
						|
def process_content(content):
 | 
						|
    """
 | 
						|
    Process the entire content, finding and cleaning all tables.
 | 
						|
    
 | 
						|
    Args:
 | 
						|
        content (str): Full document content
 | 
						|
    
 | 
						|
    Returns:
 | 
						|
        str: Content with cleaned tables
 | 
						|
    """
 | 
						|
    lines = content.split('\n')
 | 
						|
    result_lines = []
 | 
						|
    i = 0
 | 
						|
    
 | 
						|
    while i < len(lines):
 | 
						|
        line = lines[i]
 | 
						|
            
 | 
						|
        # Check for list table start
 | 
						|
        if re.match(r'^\s*\.\.\s+(list-table::|table::)', line):
 | 
						|
            # Found list table directive, collect the entire table
 | 
						|
            table_lines = [line]
 | 
						|
            i += 1
 | 
						|
            
 | 
						|
            # Collect all indented lines that belong to this directive
 | 
						|
            while i < len(lines):
 | 
						|
                current_line = lines[i]
 | 
						|
                if (current_line.strip() == '' or 
 | 
						|
                    current_line.startswith('   ') or 
 | 
						|
                    current_line.startswith('\t')):
 | 
						|
                    table_lines.append(current_line)
 | 
						|
                    i += 1
 | 
						|
                elif re.match(r'^\s*\*\s*-', current_line):
 | 
						|
                    # This is a list table row
 | 
						|
                    table_lines.append(current_line)
 | 
						|
                    i += 1
 | 
						|
                else:
 | 
						|
                    break
 | 
						|
            
 | 
						|
            # Clean the list table
 | 
						|
            table_text = '\n'.join(table_lines)
 | 
						|
            cleaned_table = clean_list_table(table_text)
 | 
						|
            result_lines.extend(cleaned_table.split('\n'))
 | 
						|
            
 | 
						|
        else:
 | 
						|
            result_lines.append(line)
 | 
						|
            i += 1
 | 
						|
    
 | 
						|
    return '\n'.join(result_lines)
 | 
						|
 | 
						|
 | 
						|
def main():
 | 
						|
    parser = argparse.ArgumentParser(
 | 
						|
        description='Remove empty rows from reStructuredText tables'
 | 
						|
    )
 | 
						|
    parser.add_argument(
 | 
						|
        'input_file',
 | 
						|
        help='Input .rst file path'
 | 
						|
    )
 | 
						|
    parser.add_argument(
 | 
						|
        '-o', '--output',
 | 
						|
        help='Output file path (default: overwrite input file)'
 | 
						|
    )
 | 
						|
    parser.add_argument(
 | 
						|
        '--dry-run',
 | 
						|
        action='store_true',
 | 
						|
        help='Show what would be changed without modifying files'
 | 
						|
    )
 | 
						|
    
 | 
						|
    args = parser.parse_args()
 | 
						|
    
 | 
						|
    # Read input file
 | 
						|
    try:
 | 
						|
        input_path = Path(args.input_file)
 | 
						|
        with open(input_path, 'r', encoding='utf-8') as f:
 | 
						|
            content = f.read()
 | 
						|
    except FileNotFoundError:
 | 
						|
        print(f"Error: File '{args.input_file}' not found", file=sys.stderr)
 | 
						|
        sys.exit(1)
 | 
						|
    except Exception as e:
 | 
						|
        print(f"Error reading file: {e}", file=sys.stderr)
 | 
						|
        sys.exit(1)
 | 
						|
    
 | 
						|
    # Clean the content
 | 
						|
    cleaned_content = process_content(content)
 | 
						|
    
 | 
						|
    # Handle output
 | 
						|
    if args.dry_run:
 | 
						|
        print("=== DRY RUN MODE ===")
 | 
						|
        if cleaned_content != content:
 | 
						|
            print("Changes would be made:")
 | 
						|
            print("=" * 50)
 | 
						|
            print(cleaned_content)
 | 
						|
            print("=" * 50)
 | 
						|
        else:
 | 
						|
            print("No changes needed.")
 | 
						|
    else:
 | 
						|
        output_path = Path(args.output) if args.output else input_path
 | 
						|
        
 | 
						|
        try:
 | 
						|
            with open(output_path, 'w', encoding='utf-8') as f:
 | 
						|
                f.write(cleaned_content)
 | 
						|
            print(f"Successfully cleaned tables in '{output_path}'")
 | 
						|
        except Exception as e:
 | 
						|
            print(f"Error writing file: {e}", file=sys.stderr)
 | 
						|
            sys.exit(1)
 | 
						|
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    main()
 |