Fix indentation logic in cell reconstruction Change-Id: I208c10538a7eb8144d521a75e554fc61e0111201 Signed-off-by: Ron Stone <ronald.stone@windriver.com>
		
			
				
	
	
		
			226 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			226 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import os
 | 
						|
import re
 | 
						|
import sys
 | 
						|
 | 
						|
def parse_meta_directives(lines):
 | 
						|
   directives = {}
 | 
						|
   for line in lines:
 | 
						|
       if ":remove-column-from-html-table:" in line:
 | 
						|
           directives["remove_column"] = line.split(":", 2)[2].strip()
 | 
						|
       if ":remove-column-emptied-row:" in line:
 | 
						|
           directives["remove_emptied_row"] = line.split(":", 2)[2].strip() == "1"
 | 
						|
       if ":docs-build-context:" in line:
 | 
						|
           directives["context"] = line.split(":", 2)[2].strip()
 | 
						|
 | 
						|
   return directives
 | 
						|
 | 
						|
def extract_table_blocks(lines):
 | 
						|
   blocks = []
 | 
						|
   current = []
 | 
						|
   inside = False
 | 
						|
   for line in lines:
 | 
						|
       if line.strip().startswith(".. list-table::"):
 | 
						|
           inside = True
 | 
						|
           current = [line]
 | 
						|
       elif inside and line.startswith("  ") or line.strip() == "":
 | 
						|
           current.append(line)
 | 
						|
       elif inside:
 | 
						|
           blocks.append(current)
 | 
						|
           inside = False
 | 
						|
   if inside:
 | 
						|
       blocks.append(current)
 | 
						|
   return blocks
 | 
						|
 | 
						|
def split_table_row(row_lines):
 | 
						|
    """Splits a table row (beginning with '*') into a list of cells, preserving original line spacing."""
 | 
						|
    import re
 | 
						|
    cells = []
 | 
						|
    current_cell = []
 | 
						|
    for line in row_lines:
 | 
						|
        if re.match(r'^\s*\*\s+-', line):  # First cell in row
 | 
						|
            # Keep only the cell content (text after "* - ")
 | 
						|
            parts = re.split(r'^\s*\*\s+-\s*', line, maxsplit=1)
 | 
						|
            current_cell = [parts[1]]
 | 
						|
        elif re.match(r'^\s*-\s+', line):  # New cell
 | 
						|
            # Finish previous cell and start a new one (text after "- ")
 | 
						|
            cells.append(current_cell)
 | 
						|
            parts = re.split(r'^\s*-\s+', line, maxsplit=1)
 | 
						|
            current_cell = [parts[1]]
 | 
						|
        else:
 | 
						|
            # Continuation line: keep exactly as-is (including leading spaces)
 | 
						|
            current_cell.append(line)
 | 
						|
    cells.append(current_cell)
 | 
						|
    return cells
 | 
						|
 | 
						|
def join_cells(cells, base_indent):
 | 
						|
    """Reconstructs a list-table row from cell lists.
 | 
						|
 | 
						|
    Continuation lines are aligned to the cell's content column (the column where the
 | 
						|
    first line's text starts), and any additional original indentation beyond that is preserved.
 | 
						|
    This also preserves the deeper indentation used by directive option lines.
 | 
						|
    """
 | 
						|
    # In a list-table, content starts 4 characters after the list marker for both
 | 
						|
    # the first cell ("* - ") and other cells ("  - ").
 | 
						|
    content_col_len = len(base_indent) + 4  # baseline spaces before content in any cell
 | 
						|
 | 
						|
    def count_leading_spaces(s: str) -> int:
 | 
						|
        n = 0
 | 
						|
        for ch in s:
 | 
						|
            if ch == ' ':
 | 
						|
                n += 1
 | 
						|
            elif ch == '\t':
 | 
						|
                # tabs aren't expected in RST, but treat as 4 spaces if present
 | 
						|
                n += 4
 | 
						|
            else:
 | 
						|
                break
 | 
						|
        return n
 | 
						|
 | 
						|
    out = []
 | 
						|
 | 
						|
    # First cell
 | 
						|
    first_line_text = cells[0][0].rstrip('\n')
 | 
						|
    out.append(f"{base_indent}* - {first_line_text.rstrip()}")
 | 
						|
 | 
						|
    for line in cells[0][1:]:
 | 
						|
        if line.strip() == "":
 | 
						|
            out.append("")  # preserve blank lines
 | 
						|
            continue
 | 
						|
        s = line.rstrip('\n')
 | 
						|
        lead = count_leading_spaces(s)
 | 
						|
        extra = max(0, lead - content_col_len)  # extra indent beyond the baseline content column
 | 
						|
        out.append((" " * content_col_len) + (" " * extra) + s.lstrip(' '))
 | 
						|
 | 
						|
    # Remaining cells
 | 
						|
    for cell in cells[1:]:
 | 
						|
        first_line_text = cell[0].rstrip('\n')
 | 
						|
        out.append(f"{base_indent}  - {first_line_text.rstrip()}")
 | 
						|
        for line in cell[1:]:
 | 
						|
            if line.strip() == "":
 | 
						|
                out.append("")
 | 
						|
                continue
 | 
						|
            s = line.rstrip('\n')
 | 
						|
            lead = count_leading_spaces(s)
 | 
						|
            extra = max(0, lead - content_col_len)
 | 
						|
            out.append((" " * content_col_len) + (" " * extra) + s.lstrip(' '))
 | 
						|
 | 
						|
    return out
 | 
						|
 | 
						|
 | 
						|
 | 
						|
def process_table(table_lines, cols_to_remove_str, remove_empty_row=False):
 | 
						|
   # Parse comma-separated column names
 | 
						|
   cols_to_remove = [col.strip() for col in cols_to_remove_str.split(',')]
 | 
						|
   
 | 
						|
   processed = []
 | 
						|
   table_rows = []
 | 
						|
   header_indices = []
 | 
						|
   header_row = []
 | 
						|
   buffer = []
 | 
						|
 | 
						|
   for line in table_lines:
 | 
						|
       if re.match(r'\s*\*\s+-', line):
 | 
						|
           match = re.match(r'(\s*)\*\s+-', line)
 | 
						|
           indentation = match.group(1)
 | 
						|
           if buffer:
 | 
						|
               table_rows.append(buffer)
 | 
						|
           buffer = [line]
 | 
						|
       elif buffer != [] and (line.strip() == "" or re.match(r'^\s*(-|[^*].*)$', line)):
 | 
						|
           buffer.append(line)
 | 
						|
       else:
 | 
						|
           if buffer:
 | 
						|
               table_rows.append(buffer)
 | 
						|
               buffer = []
 | 
						|
           processed.append(line)
 | 
						|
 | 
						|
   if buffer:
 | 
						|
       table_rows.append(buffer)
 | 
						|
 | 
						|
   # Parse header row and find all matching column indices
 | 
						|
   for i, row in enumerate(table_rows):
 | 
						|
       if i == 0:
 | 
						|
           cells = split_table_row(row)
 | 
						|
           flat_cells = [' '.join(c).strip() for c in cells]
 | 
						|
           
 | 
						|
           # Find indices of all columns to remove
 | 
						|
           for col_name in cols_to_remove:
 | 
						|
               if col_name in flat_cells:
 | 
						|
                   header_indices.append(flat_cells.index(col_name))
 | 
						|
           
 | 
						|
           # If no columns found, don't modify
 | 
						|
           if not header_indices:
 | 
						|
               return table_lines
 | 
						|
           
 | 
						|
           # Sort indices in descending order so we can remove from right to left
 | 
						|
           header_indices.sort(reverse=True)
 | 
						|
           header_row = cells
 | 
						|
       break
 | 
						|
 | 
						|
   if not header_indices:
 | 
						|
       return table_lines  # Don't modify
 | 
						|
 | 
						|
   # Remove the columns from each row
 | 
						|
   new_rows = []
 | 
						|
   for row in table_rows:
 | 
						|
       cells = split_table_row(row)
 | 
						|
       
 | 
						|
       # Remove columns from right to left to preserve indices
 | 
						|
       for header_index in header_indices:
 | 
						|
           if header_index < len(cells):
 | 
						|
               del cells[header_index]
 | 
						|
       
 | 
						|
       # Check if row should be removed (if it's empty after column removal)
 | 
						|
       if remove_empty_row and all(not ''.join(cell).strip() for cell in cells):
 | 
						|
           continue
 | 
						|
           
 | 
						|
       new_rows.append(join_cells(cells, indentation))
 | 
						|
 | 
						|
   return processed + [""] + [line for row in new_rows for line in row]
 | 
						|
 | 
						|
def process_file(path):
 | 
						|
   with open(path, 'r', encoding='utf-8') as f:
 | 
						|
       lines = f.readlines()
 | 
						|
 | 
						|
   directives = parse_meta_directives(lines)
 | 
						|
   if "remove_column" not in directives:
 | 
						|
       return
 | 
						|
   if directives["context"] != os.environ.get('DOCS_BUILD_CONTEXT'):
 | 
						|
       print("Not in", directives["context"], "- Skipping")
 | 
						|
       return
 | 
						|
 | 
						|
   table_blocks = extract_table_blocks(lines)
 | 
						|
   output_lines = []
 | 
						|
   i = 0
 | 
						|
   while i < len(lines):
 | 
						|
       line = lines[i]
 | 
						|
       if line.strip().startswith(".. list-table::"):
 | 
						|
           # Find the table block and replace
 | 
						|
           for block in table_blocks:
 | 
						|
               if lines[i:i+len(block)] == block:
 | 
						|
                   processed = process_table(
 | 
						|
                       block,
 | 
						|
                       directives["remove_column"],
 | 
						|
                       directives.get("remove_emptied_row", False)
 | 
						|
                   )
 | 
						|
                   output_lines.extend(processed)
 | 
						|
                   i += len(block)
 | 
						|
                   break
 | 
						|
       else:
 | 
						|
           output_lines.append(line)
 | 
						|
           i += 1
 | 
						|
 | 
						|
   with open(path, 'w', encoding='utf-8') as f:
 | 
						|
       f.writelines(l + ("\n" if not l.endswith("\n") else "") for l in output_lines)
 | 
						|
 | 
						|
# not used currently. We get a list of files from grep and loop
 | 
						|
def scan_dir(directory):
 | 
						|
   for root, _, files in os.walk(directory):
 | 
						|
       for name in files:
 | 
						|
           if name.endswith(".rst"):
 | 
						|
               process_file(os.path.join(root, name))
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
   if len(sys.argv) != 2:
 | 
						|
       print("Usage: python remove-columns.py <directory>")
 | 
						|
       sys.exit(1)
 | 
						|
   process_file(sys.argv[1])
 |