docs/remove_empty-list_rows.py

#!/usr/bin/env python3
"""
Script to remove empty rows from reStructuredText grid tables and list tables.
Supports both grid tables (with +---+ borders) and simple list tables.
"""

import re
import argparse
import sys
from pathlib import Path


def is_list_row_empty(row_lines):
    """
    Check if a list table row contains only empty cells.

    Args:
        row_lines (list): Lines that make up the row

    Returns:
        bool: True if row contains only empty cells, False otherwise
    """
    # Join all lines and remove list markers
    content = ' '.join(row_lines)
    # Remove the first list marker
    content = re.sub(r'^\s*\*\s*-\s*', '', content)
    # Remove additional cell separators
    content = re.sub(r'\s*-\s*', ' ', content)
    # Check if anything meaningful remains
    return not content.strip()


def clean_list_table(table_text):
    """
    Remove empty rows from a reStructuredText list table.

    Args:
        table_text (str): The complete list table text

    Returns:
        str: Cleaned table text with empty rows removed
    """
    lines = table_text.split('\n')
    cleaned_lines = []

    current_row_lines = []
    in_row = False

    for i, line in enumerate(lines):
        # Check if this line starts a new row (begins with *)
        if re.match(r'^\s*\*\s*-', line):
            # Process previous row if it exists
            if current_row_lines:
                if not is_list_row_empty(current_row_lines):
                    cleaned_lines.extend(current_row_lines)
                current_row_lines = []

            # Start new row
            current_row_lines = [line]
            in_row = True
        elif in_row and re.match(r'^\s+-', line):
            # This is a continuation of the current row (additional columns)
            current_row_lines.append(line)
        elif in_row and line.strip() == '':
            # Empty line might end the row, but could also be within row
            # Look ahead to see if next line continues the row
            if i + 1 < len(lines) and re.match(r'^\s*-', lines[i + 1]):
                current_row_lines.append(line)  # Part of current row
            else:
                # End of row
                if not is_list_row_empty(current_row_lines):
                    cleaned_lines.extend(current_row_lines)
                current_row_lines = []
                cleaned_lines.append(line)
                in_row = False
        elif in_row and re.match(r'^\s+\S', line):
            # Multi-line cell content
            current_row_lines.append(line)
        else:
            # Not in a row, or row ended
            if current_row_lines:
                if not is_list_row_empty(current_row_lines):
                    cleaned_lines.extend(current_row_lines)
                current_row_lines = []
            cleaned_lines.append(line)
            in_row = False

    # Handle the last row
    if current_row_lines and not is_list_row_empty(current_row_lines):
        cleaned_lines.extend(current_row_lines)

    return '\n'.join(cleaned_lines)


def process_content(content):
    """
    Process the entire content, finding and cleaning all tables.

    Args:
        content (str): Full document content

    Returns:
        str: Content with cleaned tables
    """
    lines = content.split('\n')
    result_lines = []
    i = 0

    while i < len(lines):
        line = lines[i]

        # Check for list table start
        if re.match(r'^\s*\.\.\s+(list-table::|table::)', line):
            # Found list table directive, collect the entire table
            table_lines = [line]
            i += 1

            # Collect all indented lines that belong to this directive
            while i < len(lines):
                current_line = lines[i]
                if (current_line.strip() == '' or
                    current_line.startswith('   ') or
                    current_line.startswith('\t')):
                    table_lines.append(current_line)
                    i += 1
                elif re.match(r'^\s*\*\s*-', current_line):
                    # This is a list table row
                    table_lines.append(current_line)
                    i += 1
                else:
                    break

            # Clean the list table
            table_text = '\n'.join(table_lines)
            cleaned_table = clean_list_table(table_text)
            result_lines.extend(cleaned_table.split('\n'))

        else:
            result_lines.append(line)
            i += 1

    return '\n'.join(result_lines)


def main():
    parser = argparse.ArgumentParser(
        description='Remove empty rows from reStructuredText tables'
    )
    parser.add_argument(
        'input_file',
        help='Input .rst file path'
    )
    parser.add_argument(
        '-o', '--output',
        help='Output file path (default: overwrite input file)'
    )
    parser.add_argument(
        '--dry-run',
        action='store_true',
        help='Show what would be changed without modifying files'
    )

    args = parser.parse_args()

    # Read input file
    try:
        input_path = Path(args.input_file)
        with open(input_path, 'r', encoding='utf-8') as f:
            content = f.read()
    except FileNotFoundError:
        print(f"Error: File '{args.input_file}' not found", file=sys.stderr)
        sys.exit(1)
    except Exception as e:
        print(f"Error reading file: {e}", file=sys.stderr)
        sys.exit(1)

    # Clean the content
    cleaned_content = process_content(content)

    # Handle output
    if args.dry_run:
        print("=== DRY RUN MODE ===")
        if cleaned_content != content:
            print("Changes would be made:")
            print("=" * 50)
            print(cleaned_content)
            print("=" * 50)
        else:
            print("No changes needed.")
    else:
        output_path = Path(args.output) if args.output else input_path

        try:
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(cleaned_content)
            print(f"Successfully cleaned tables in '{output_path}'")
        except Exception as e:
            print(f"Error writing file: {e}", file=sys.stderr)
            sys.exit(1)


if __name__ == '__main__':
    main()