Files
docs/remove-list-columns.py
Ron Stone ab6eace956 Fix indentation
Fix indentation logic in cell reconstruction

Change-Id: I208c10538a7eb8144d521a75e554fc61e0111201
Signed-off-by: Ron Stone <ronald.stone@windriver.com>
2025-08-13 15:58:30 +00:00

226 lines
7.5 KiB
Python

import os
import re
import sys
def parse_meta_directives(lines):
directives = {}
for line in lines:
if ":remove-column-from-html-table:" in line:
directives["remove_column"] = line.split(":", 2)[2].strip()
if ":remove-column-emptied-row:" in line:
directives["remove_emptied_row"] = line.split(":", 2)[2].strip() == "1"
if ":docs-build-context:" in line:
directives["context"] = line.split(":", 2)[2].strip()
return directives
def extract_table_blocks(lines):
blocks = []
current = []
inside = False
for line in lines:
if line.strip().startswith(".. list-table::"):
inside = True
current = [line]
elif inside and line.startswith(" ") or line.strip() == "":
current.append(line)
elif inside:
blocks.append(current)
inside = False
if inside:
blocks.append(current)
return blocks
def split_table_row(row_lines):
"""Splits a table row (beginning with '*') into a list of cells, preserving original line spacing."""
import re
cells = []
current_cell = []
for line in row_lines:
if re.match(r'^\s*\*\s+-', line): # First cell in row
# Keep only the cell content (text after "* - ")
parts = re.split(r'^\s*\*\s+-\s*', line, maxsplit=1)
current_cell = [parts[1]]
elif re.match(r'^\s*-\s+', line): # New cell
# Finish previous cell and start a new one (text after "- ")
cells.append(current_cell)
parts = re.split(r'^\s*-\s+', line, maxsplit=1)
current_cell = [parts[1]]
else:
# Continuation line: keep exactly as-is (including leading spaces)
current_cell.append(line)
cells.append(current_cell)
return cells
def join_cells(cells, base_indent):
"""Reconstructs a list-table row from cell lists.
Continuation lines are aligned to the cell's content column (the column where the
first line's text starts), and any additional original indentation beyond that is preserved.
This also preserves the deeper indentation used by directive option lines.
"""
# In a list-table, content starts 4 characters after the list marker for both
# the first cell ("* - ") and other cells (" - ").
content_col_len = len(base_indent) + 4 # baseline spaces before content in any cell
def count_leading_spaces(s: str) -> int:
n = 0
for ch in s:
if ch == ' ':
n += 1
elif ch == '\t':
# tabs aren't expected in RST, but treat as 4 spaces if present
n += 4
else:
break
return n
out = []
# First cell
first_line_text = cells[0][0].rstrip('\n')
out.append(f"{base_indent}* - {first_line_text.rstrip()}")
for line in cells[0][1:]:
if line.strip() == "":
out.append("") # preserve blank lines
continue
s = line.rstrip('\n')
lead = count_leading_spaces(s)
extra = max(0, lead - content_col_len) # extra indent beyond the baseline content column
out.append((" " * content_col_len) + (" " * extra) + s.lstrip(' '))
# Remaining cells
for cell in cells[1:]:
first_line_text = cell[0].rstrip('\n')
out.append(f"{base_indent} - {first_line_text.rstrip()}")
for line in cell[1:]:
if line.strip() == "":
out.append("")
continue
s = line.rstrip('\n')
lead = count_leading_spaces(s)
extra = max(0, lead - content_col_len)
out.append((" " * content_col_len) + (" " * extra) + s.lstrip(' '))
return out
def process_table(table_lines, cols_to_remove_str, remove_empty_row=False):
# Parse comma-separated column names
cols_to_remove = [col.strip() for col in cols_to_remove_str.split(',')]
processed = []
table_rows = []
header_indices = []
header_row = []
buffer = []
for line in table_lines:
if re.match(r'\s*\*\s+-', line):
match = re.match(r'(\s*)\*\s+-', line)
indentation = match.group(1)
if buffer:
table_rows.append(buffer)
buffer = [line]
elif buffer != [] and (line.strip() == "" or re.match(r'^\s*(-|[^*].*)$', line)):
buffer.append(line)
else:
if buffer:
table_rows.append(buffer)
buffer = []
processed.append(line)
if buffer:
table_rows.append(buffer)
# Parse header row and find all matching column indices
for i, row in enumerate(table_rows):
if i == 0:
cells = split_table_row(row)
flat_cells = [' '.join(c).strip() for c in cells]
# Find indices of all columns to remove
for col_name in cols_to_remove:
if col_name in flat_cells:
header_indices.append(flat_cells.index(col_name))
# If no columns found, don't modify
if not header_indices:
return table_lines
# Sort indices in descending order so we can remove from right to left
header_indices.sort(reverse=True)
header_row = cells
break
if not header_indices:
return table_lines # Don't modify
# Remove the columns from each row
new_rows = []
for row in table_rows:
cells = split_table_row(row)
# Remove columns from right to left to preserve indices
for header_index in header_indices:
if header_index < len(cells):
del cells[header_index]
# Check if row should be removed (if it's empty after column removal)
if remove_empty_row and all(not ''.join(cell).strip() for cell in cells):
continue
new_rows.append(join_cells(cells, indentation))
return processed + [""] + [line for row in new_rows for line in row]
def process_file(path):
with open(path, 'r', encoding='utf-8') as f:
lines = f.readlines()
directives = parse_meta_directives(lines)
if "remove_column" not in directives:
return
if directives["context"] != os.environ.get('DOCS_BUILD_CONTEXT'):
print("Not in", directives["context"], "- Skipping")
return
table_blocks = extract_table_blocks(lines)
output_lines = []
i = 0
while i < len(lines):
line = lines[i]
if line.strip().startswith(".. list-table::"):
# Find the table block and replace
for block in table_blocks:
if lines[i:i+len(block)] == block:
processed = process_table(
block,
directives["remove_column"],
directives.get("remove_emptied_row", False)
)
output_lines.extend(processed)
i += len(block)
break
else:
output_lines.append(line)
i += 1
with open(path, 'w', encoding='utf-8') as f:
f.writelines(l + ("\n" if not l.endswith("\n") else "") for l in output_lines)
# not used currently. We get a list of files from grep and loop
def scan_dir(directory):
for root, _, files in os.walk(directory):
for name in files:
if name.endswith(".rst"):
process_file(os.path.join(root, name))
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python remove-columns.py <directory>")
sys.exit(1)
process_file(sys.argv[1])