import os import re import sys def parse_meta_directives(lines): directives = {} for line in lines: if ":remove-column-from-html-table:" in line: directives["remove_column"] = line.split(":", 2)[2].strip() if ":remove-column-emptied-row:" in line: directives["remove_emptied_row"] = line.split(":", 2)[2].strip() == "1" return directives def extract_table_blocks(lines): blocks = [] current = [] inside = False for line in lines: if line.strip().startswith(".. list-table::"): inside = True current = [line] elif inside and line.startswith(" ") or line.strip() == "": current.append(line) elif inside: blocks.append(current) inside = False if inside: blocks.append(current) return blocks def split_table_row(row_lines): """Splits a table row (beginning with '*') into a list of cells.""" cells = [] current_cell = [] for line in row_lines: if re.match(r'^\s*\*\s+-', line): # First cell in row parts = re.split(r'\s*\*\s+-\s*', line, maxsplit=1) current_cell = [parts[1]] elif re.match(r'^\s*-\s+', line): # New cell cells.append(current_cell) current_cell = [line.strip()[2:]] else: current_cell.append(line.strip()) cells.append(current_cell) return cells def join_cells(cells, base_indent=" "): """Reconstructs a list-table row from cell lists.""" line = f"{base_indent}* - " + cells[0][0] lines = [line] for line in cells[0][1:]: lines.append(base_indent + " " + line) for cell in cells[1:]: lines.append(base_indent + " - " + cell[0]) for l in cell[1:]: lines.append(base_indent + " " + l) return lines def process_table(table_lines, col_to_remove, remove_empty_row=False): processed = [] table_rows = [] header_index = -1 header_row = [] buffer = [] for line in table_lines: if re.match(r'\s*\*\s+-', line): if buffer: table_rows.append(buffer) buffer = [line] elif buffer != [] and (line.strip() == "" or re.match(r'^\s*(-|[^*].*)$', line)): buffer.append(line) else: if buffer: table_rows.append(buffer) buffer = [] processed.append(line) if buffer: table_rows.append(buffer) # Parse header row for i, row in enumerate(table_rows): if i == 0: cells = split_table_row(row) flat_cells = [' '.join(c).strip() for c in cells] if col_to_remove not in flat_cells: return table_lines # Don't modify header_index = flat_cells.index(col_to_remove) header_row = cells break if header_index == -1: return table_lines # Don't modify # Remove the column from each row new_rows = [] for row in table_rows: cells = split_table_row(row) if header_index >= len(cells): continue if remove_empty_row and all(not ''.join(cell).strip() for cell in cells[:header_index] + cells[header_index+1:]): continue del cells[header_index] new_rows.append(join_cells(cells)) return processed + [""] + [line for row in new_rows for line in row] def process_file(path): with open(path, 'r', encoding='utf-8') as f: lines = f.readlines() directives = parse_meta_directives(lines) if "remove_column" not in directives: return table_blocks = extract_table_blocks(lines) output_lines = [] i = 0 while i < len(lines): line = lines[i] if line.strip().startswith(".. list-table::"): # Find the table block and replace for block in table_blocks: if lines[i:i+len(block)] == block: processed = process_table( block, directives["remove_column"], directives.get("remove_emptied_row", False) ) output_lines.extend(processed) i += len(block) break else: output_lines.append(line) i += 1 with open(path, 'w', encoding='utf-8') as f: f.writelines(l + ("\n" if not l.endswith("\n") else "") for l in output_lines) def scan_dir(directory): for root, _, files in os.walk(directory): for name in files: if name.endswith(".rst"): process_file(os.path.join(root, name)) if __name__ == "__main__": if len(sys.argv) != 2: print("Usage: python remove-columns.py ") sys.exit(1) process_file(sys.argv[1]) # scan_dir(sys.argv[1])