
Reimplement table column and row removal to be output format agnostic. Change-Id: I4822d53d37fd4604bf45c4bc4a315c8fc904376a Signed-off-by: Ron Stone <ronald.stone@windriver.com>
204 lines
6.3 KiB
Python
204 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Script to remove empty rows from reStructuredText grid tables and list tables.
|
|
Supports both grid tables (with +---+ borders) and simple list tables.
|
|
"""
|
|
|
|
import re
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def is_list_row_empty(row_lines):
|
|
"""
|
|
Check if a list table row contains only empty cells.
|
|
|
|
Args:
|
|
row_lines (list): Lines that make up the row
|
|
|
|
Returns:
|
|
bool: True if row contains only empty cells, False otherwise
|
|
"""
|
|
# Join all lines and remove list markers
|
|
content = ' '.join(row_lines)
|
|
# Remove the first list marker
|
|
content = re.sub(r'^\s*\*\s*-\s*', '', content)
|
|
# Remove additional cell separators
|
|
content = re.sub(r'\s*-\s*', ' ', content)
|
|
# Check if anything meaningful remains
|
|
return not content.strip()
|
|
|
|
|
|
def clean_list_table(table_text):
|
|
"""
|
|
Remove empty rows from a reStructuredText list table.
|
|
|
|
Args:
|
|
table_text (str): The complete list table text
|
|
|
|
Returns:
|
|
str: Cleaned table text with empty rows removed
|
|
"""
|
|
lines = table_text.split('\n')
|
|
cleaned_lines = []
|
|
|
|
current_row_lines = []
|
|
in_row = False
|
|
|
|
for i, line in enumerate(lines):
|
|
# Check if this line starts a new row (begins with *)
|
|
if re.match(r'^\s*\*\s*-', line):
|
|
# Process previous row if it exists
|
|
if current_row_lines:
|
|
if not is_list_row_empty(current_row_lines):
|
|
cleaned_lines.extend(current_row_lines)
|
|
current_row_lines = []
|
|
|
|
# Start new row
|
|
current_row_lines = [line]
|
|
in_row = True
|
|
elif in_row and re.match(r'^\s+-', line):
|
|
# This is a continuation of the current row (additional columns)
|
|
current_row_lines.append(line)
|
|
elif in_row and line.strip() == '':
|
|
# Empty line might end the row, but could also be within row
|
|
# Look ahead to see if next line continues the row
|
|
if i + 1 < len(lines) and re.match(r'^\s*-', lines[i + 1]):
|
|
current_row_lines.append(line) # Part of current row
|
|
else:
|
|
# End of row
|
|
if not is_list_row_empty(current_row_lines):
|
|
cleaned_lines.extend(current_row_lines)
|
|
current_row_lines = []
|
|
cleaned_lines.append(line)
|
|
in_row = False
|
|
elif in_row and re.match(r'^\s+\S', line):
|
|
# Multi-line cell content
|
|
current_row_lines.append(line)
|
|
else:
|
|
# Not in a row, or row ended
|
|
if current_row_lines:
|
|
if not is_list_row_empty(current_row_lines):
|
|
cleaned_lines.extend(current_row_lines)
|
|
current_row_lines = []
|
|
cleaned_lines.append(line)
|
|
in_row = False
|
|
|
|
# Handle the last row
|
|
if current_row_lines and not is_list_row_empty(current_row_lines):
|
|
cleaned_lines.extend(current_row_lines)
|
|
|
|
return '\n'.join(cleaned_lines)
|
|
|
|
|
|
def process_content(content):
|
|
"""
|
|
Process the entire content, finding and cleaning all tables.
|
|
|
|
Args:
|
|
content (str): Full document content
|
|
|
|
Returns:
|
|
str: Content with cleaned tables
|
|
"""
|
|
lines = content.split('\n')
|
|
result_lines = []
|
|
i = 0
|
|
|
|
while i < len(lines):
|
|
line = lines[i]
|
|
|
|
# Check for list table start
|
|
if re.match(r'^\s*\.\.\s+(list-table::|table::)', line):
|
|
# Found list table directive, collect the entire table
|
|
table_lines = [line]
|
|
i += 1
|
|
|
|
# Collect all indented lines that belong to this directive
|
|
while i < len(lines):
|
|
current_line = lines[i]
|
|
if (current_line.strip() == '' or
|
|
current_line.startswith(' ') or
|
|
current_line.startswith('\t')):
|
|
table_lines.append(current_line)
|
|
i += 1
|
|
elif re.match(r'^\s*\*\s*-', current_line):
|
|
# This is a list table row
|
|
table_lines.append(current_line)
|
|
i += 1
|
|
else:
|
|
break
|
|
|
|
# Clean the list table
|
|
table_text = '\n'.join(table_lines)
|
|
cleaned_table = clean_list_table(table_text)
|
|
result_lines.extend(cleaned_table.split('\n'))
|
|
|
|
else:
|
|
result_lines.append(line)
|
|
i += 1
|
|
|
|
return '\n'.join(result_lines)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Remove empty rows from reStructuredText tables'
|
|
)
|
|
parser.add_argument(
|
|
'input_file',
|
|
help='Input .rst file path'
|
|
)
|
|
parser.add_argument(
|
|
'-o', '--output',
|
|
help='Output file path (default: overwrite input file)'
|
|
)
|
|
parser.add_argument(
|
|
'--dry-run',
|
|
action='store_true',
|
|
help='Show what would be changed without modifying files'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Read input file
|
|
try:
|
|
input_path = Path(args.input_file)
|
|
with open(input_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
except FileNotFoundError:
|
|
print(f"Error: File '{args.input_file}' not found", file=sys.stderr)
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
print(f"Error reading file: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Clean the content
|
|
cleaned_content = process_content(content)
|
|
|
|
# Handle output
|
|
if args.dry_run:
|
|
print("=== DRY RUN MODE ===")
|
|
if cleaned_content != content:
|
|
print("Changes would be made:")
|
|
print("=" * 50)
|
|
print(cleaned_content)
|
|
print("=" * 50)
|
|
else:
|
|
print("No changes needed.")
|
|
else:
|
|
output_path = Path(args.output) if args.output else input_path
|
|
|
|
try:
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write(cleaned_content)
|
|
print(f"Successfully cleaned tables in '{output_path}'")
|
|
except Exception as e:
|
|
print(f"Error writing file: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|