Notebook and Module Parsing

Parse notebook metadata, content, and extract function/class signatures with docments

Data Models for Parsing


FunctionInfo


def FunctionInfo(
    name:str, signature:str, docstring:Optional[str]=None, decorators:List[str]=<factory>, is_exported:bool=False,
    is_async:bool=False, source_line:Optional[int]=None
)->None:

Information about a function


VariableInfo


def VariableInfo(
    name:str, value:Optional[str]=None, type_hint:Optional[str]=None, comment:Optional[str]=None,
    is_exported:bool=False
)->None:

Information about a module-level variable


ClassInfo


def ClassInfo(
    name:str, signature:str, docstring:Optional[str]=None, methods:List[FunctionInfo]=<factory>,
    decorators:List[str]=<factory>, attributes:List[VariableInfo]=<factory>, is_exported:bool=False,
    source_line:Optional[int]=None
)->None:

Information about a class


ModuleInfo


def ModuleInfo(
    path:Path, name:str, title:Optional[str]=None, description:Optional[str]=None,
    functions:List[FunctionInfo]=<factory>, classes:List[ClassInfo]=<factory>,
    variables:List[VariableInfo]=<factory>, imports:List[str]=<factory>
)->None:

Information about a module (notebook or Python file)

AST Parsing Utilities


extract_docments_signature


def extract_docments_signature(
    node:Union[ast.FunctionDef, ast.AsyncFunctionDef], # AST function node
    source_lines:List[str], # Source code lines
)->str: # Function signature

Extract function signature with docments-style comments


parse_function


def parse_function(
    node:Union[ast.FunctionDef, ast.AsyncFunctionDef], # AST function node
    source_lines:List[str], # Source code lines
    is_exported:bool=False, # Has #| export
)->FunctionInfo: # Function information

Parse a function definition from AST


parse_class


def parse_class(
    node:ast.ClassDef, # AST class node
    source_lines:List[str], # Source code lines
    is_exported:bool=False, # Has #| export
)->ClassInfo: # Class information

Parse a class definition from AST


parse_variable


def parse_variable(
    node:Union[ast.Assign, ast.AnnAssign], # AST assignment node
    source_lines:List[str], # Source code lines
    is_exported:bool=False, # Has #| export
)->List[VariableInfo]: # Variable information

Parse variable assignments from AST

Notebook Cell Parsing


parse_code_cell


def parse_code_cell(
    cell:Dict[str, Any], # Notebook code cell
)->Tuple[List[FunctionInfo], List[ClassInfo], List[VariableInfo], List[str]]: # (functions, classes, variables, imports)

Parse a notebook code cell for functions, classes, variables, and imports

Module Parsing


parse_notebook


def parse_notebook(
    path:Path, # Path to notebook
)->ModuleInfo: # Module information

Parse a notebook file for module information


parse_python_file


def parse_python_file(
    path:Path, # Path to Python file
)->ModuleInfo: # Module information

Parse a Python file for module information

Testing

Let’s test the parser on our own notebooks:

# Test parsing the core module
core_info = parse_notebook(Path("core.ipynb"))
print(f"Module: {core_info.name}")
print(f"Title: {core_info.title}")
print(f"Description: {core_info.description}")
print(f"\nFunctions ({len(core_info.functions)}):")
for func in core_info.functions[:3]:  # Show first 3
    print(f"  - {func.name}")
print(f"\nClasses ({len(core_info.classes)}):")
for cls in core_info.classes[:3]:  # Show first 3
    print(f"  - {cls.name}")
    
print(f"\nTesting refactored parse_class function...")
print("Class signatures:")
for cls in core_info.classes:
    print(f"\n{cls.name}:")
    print(f"  Decorators: {cls.decorators}")
    print(f"  Methods: {[m.name for m in cls.methods]}")
    print(f"  Attributes: {[a.name for a in cls.attributes]}")
    print(f"  Signature: {cls.signature[:100]}...")  # First 100 chars
Module: core
Title: Core Utilities
Description: Core utilities and data models for nbdev project overview generation

Functions (4):
  - get_notebook_files
  - get_subdirectories
  - read_notebook

Classes (2):
  - NotebookInfo
  - DirectoryInfo

Testing refactored parse_class function...
Class signatures:

NotebookInfo:
  Decorators: ['dataclass']
  Methods: ['relative_path']
  Attributes: ['path', 'name', 'title', 'description', 'export_module']
  Signature: class NotebookInfo:...

DirectoryInfo:
  Decorators: ['dataclass']
  Methods: ['total_notebook_count']
  Attributes: ['path', 'name', 'notebook_count', 'description', 'subdirs', 'notebooks']
  Signature: class DirectoryInfo:...
# Test extracting function signatures
print("Function signatures with docments:")
for func in core_info.functions[:2]:
    print(f"\n{func.name}:")
    print(func.signature)
Function signatures with docments:

get_notebook_files:
def get_notebook_files(path: Path = None,           # Directory to search (defaults to nbs_path)
                      recursive: bool = True        # Search subdirectories
                      ) -> List[Path]:              # List of notebook paths

get_subdirectories:
def get_subdirectories(path: Path = None,           # Directory to search (defaults to nbs_path)
                      recursive: bool = False       # Include all nested subdirectories
                      ) -> List[Path]:              # List of directory paths