import os import re import sys def parse_meta_directives(lines): directives = {} for line in lines: if ":remove-column-from-html-table:" in line: directives["remove_column"] = line.split(":", 2)[2].strip() if ":remove-column-emptied-row:" in line: directives["remove_emptied_row"] = line.split(":", 2)[2].strip() == "1" if ":docs-build-context:" in line: directives["context"] = line.split(":", 2)[2].strip() return directives def extract_table_blocks(lines): blocks = [] current = [] inside = False for line in lines: if line.strip().startswith(".. list-table::"): inside = True current = [line] elif inside and line.startswith(" ") or line.strip() == "": current.append(line) elif inside: blocks.append(current) inside = False if inside: blocks.append(current) return blocks def split_table_row(row_lines): """Splits a table row (beginning with '*') into a list of cells, preserving original line spacing.""" import re cells = [] current_cell = [] for line in row_lines: if re.match(r'^\s*\*\s+-', line): # First cell in row # Keep only the cell content (text after "* - ") parts = re.split(r'^\s*\*\s+-\s*', line, maxsplit=1) current_cell = [parts[1]] elif re.match(r'^\s*-\s+', line): # New cell # Finish previous cell and start a new one (text after "- ") cells.append(current_cell) parts = re.split(r'^\s*-\s+', line, maxsplit=1) current_cell = [parts[1]] else: # Continuation line: keep exactly as-is (including leading spaces) current_cell.append(line) cells.append(current_cell) return cells def join_cells(cells, base_indent): """Reconstructs a list-table row from cell lists. Continuation lines are aligned to the cell's content column (the column where the first line's text starts), and any additional original indentation beyond that is preserved. This also preserves the deeper indentation used by directive option lines. """ # In a list-table, content starts 4 characters after the list marker for both # the first cell ("* - ") and other cells (" - "). content_col_len = len(base_indent) + 4 # baseline spaces before content in any cell def count_leading_spaces(s: str) -> int: n = 0 for ch in s: if ch == ' ': n += 1 elif ch == '\t': # tabs aren't expected in RST, but treat as 4 spaces if present n += 4 else: break return n out = [] # First cell first_line_text = cells[0][0].rstrip('\n') out.append(f"{base_indent}* - {first_line_text.rstrip()}") for line in cells[0][1:]: if line.strip() == "": out.append("") # preserve blank lines continue s = line.rstrip('\n') lead = count_leading_spaces(s) extra = max(0, lead - content_col_len) # extra indent beyond the baseline content column out.append((" " * content_col_len) + (" " * extra) + s.lstrip(' ')) # Remaining cells for cell in cells[1:]: first_line_text = cell[0].rstrip('\n') out.append(f"{base_indent} - {first_line_text.rstrip()}") for line in cell[1:]: if line.strip() == "": out.append("") continue s = line.rstrip('\n') lead = count_leading_spaces(s) extra = max(0, lead - content_col_len) out.append((" " * content_col_len) + (" " * extra) + s.lstrip(' ')) return out def process_table(table_lines, cols_to_remove_str, remove_empty_row=False): # Parse comma-separated column names cols_to_remove = [col.strip() for col in cols_to_remove_str.split(',')] processed = [] table_rows = [] header_indices = [] header_row = [] buffer = [] for line in table_lines: if re.match(r'\s*\*\s+-', line): match = re.match(r'(\s*)\*\s+-', line) indentation = match.group(1) if buffer: table_rows.append(buffer) buffer = [line] elif buffer != [] and (line.strip() == "" or re.match(r'^\s*(-|[^*].*)$', line)): buffer.append(line) else: if buffer: table_rows.append(buffer) buffer = [] processed.append(line) if buffer: table_rows.append(buffer) # Parse header row and find all matching column indices for i, row in enumerate(table_rows): if i == 0: cells = split_table_row(row) flat_cells = [' '.join(c).strip() for c in cells] # Find indices of all columns to remove for col_name in cols_to_remove: if col_name in flat_cells: header_indices.append(flat_cells.index(col_name)) # If no columns found, don't modify if not header_indices: return table_lines # Sort indices in descending order so we can remove from right to left header_indices.sort(reverse=True) header_row = cells break if not header_indices: return table_lines # Don't modify # Remove the columns from each row new_rows = [] for row in table_rows: cells = split_table_row(row) # Remove columns from right to left to preserve indices for header_index in header_indices: if header_index < len(cells): del cells[header_index] # Check if row should be removed (if it's empty after column removal) if remove_empty_row and all(not ''.join(cell).strip() for cell in cells): continue new_rows.append(join_cells(cells, indentation)) return processed + [""] + [line for row in new_rows for line in row] def process_file(path): with open(path, 'r', encoding='utf-8') as f: lines = f.readlines() directives = parse_meta_directives(lines) if "remove_column" not in directives: return if directives["context"] != os.environ.get('DOCS_BUILD_CONTEXT'): print("Not in", directives["context"], "- Skipping") return table_blocks = extract_table_blocks(lines) output_lines = [] i = 0 while i < len(lines): line = lines[i] if line.strip().startswith(".. list-table::"): # Find the table block and replace for block in table_blocks: if lines[i:i+len(block)] == block: processed = process_table( block, directives["remove_column"], directives.get("remove_emptied_row", False) ) output_lines.extend(processed) i += len(block) break else: output_lines.append(line) i += 1 with open(path, 'w', encoding='utf-8') as f: f.writelines(l + ("\n" if not l.endswith("\n") else "") for l in output_lines) # not used currently. We get a list of files from grep and loop def scan_dir(directory): for root, _, files in os.walk(directory): for name in files: if name.endswith(".rst"): process_file(os.path.join(root, name)) if __name__ == "__main__": if len(sys.argv) != 2: print("Usage: python remove-columns.py ") sys.exit(1) process_file(sys.argv[1])