#!/usr/bin/env python3 """ Convert dfasm.sublime-syntax (Sublime Text) to dfasm.tmLanguage.json (TextMate). This script reads the Sublime Text syntax grammar and produces a TextMate-compatible JSON grammar. Key transformations: - Inlines {{variable}} references - Flattens qualified reference contexts into single patterns with captures - Degrades port-specific scopes to generic numeric scopes (TextMate limitation) - Converts context stacks to TextMate patterns with repositories Usage: python editor/scripts/sublime_to_textmate.py python editor/scripts/sublime_to_textmate.py INPUT.sublime-syntax OUTPUT.tmLanguage.json """ import sys import json import re import yaml from pathlib import Path from typing import Any, Dict, List, Optional, Tuple def load_sublime_syntax(path: str) -> Dict[str, Any]: """Load YAML sublime-syntax file.""" with open(path, 'r') as f: return yaml.safe_load(f) def inline_variables(text: str, variables: Dict[str, str]) -> str: """Inline {{variable}} references with variable values. Uses non-capturing groups (?:...) to avoid creating extra capture groups that break capture numbering in TextMate patterns. """ def replace_var(match): var_name = match.group(1) if var_name in variables: return f"(?:{variables[var_name]})" return match.group(0) return re.sub(r'\{\{(\w+)\}\}', replace_var, text) def build_string_pattern(quote_char: str, prefix: str = '', escaped: bool = True) -> Dict[str, Any]: """ Build a begin/end pattern for strings/char literals. Args: quote_char: '"' or "'" prefix: 'r' (raw), 'b' (byte), or '' (normal) escaped: Whether to include escape patterns Returns: Pattern dict with begin/end structure """ escaped_quote = re.escape(quote_char) full_begin = f"{prefix}{quote_char}" scope_suffix = '' if quote_char == '"': if prefix == 'r': scope_base = 'string.quoted.double.raw.dfasm' elif prefix == 'b': scope_base = 'string.quoted.double.byte.dfasm' else: scope_base = 'string.quoted.double.dfasm' else: scope_base = 'string.quoted.single.dfasm' pattern = { 'begin': re.escape(full_begin), 'beginCaptures': { '0': {'name': scope_base} }, 'end': escaped_quote, 'endCaptures': { '0': {'name': scope_base} }, 'name': scope_base, } if escaped: # Add escape patterns patterns = [ {'match': r'\\[ntr0\\\\\'\"]', 'name': 'constant.character.escape.dfasm'}, {'match': r'\\x[0-9a-fA-F]{2}', 'name': 'constant.character.escape.dfasm'}, ] pattern['patterns'] = patterns return pattern def build_func_body_pattern() -> Dict[str, Any]: """Build a begin/end pattern for function bodies.""" return { 'begin': r'\{', 'beginCaptures': { '0': {'name': 'punctuation.section.block.dfasm'} }, 'end': r'\}', 'endCaptures': { '0': {'name': 'punctuation.section.block.dfasm'} }, 'name': 'meta.function.body.dfasm', 'patterns': [ {'include': '$self'} ] } def build_qualified_ref_pattern(sigil: str, ref_type: str) -> Tuple[str, Dict[str, Any]]: """ Build a single regex pattern with captures for qualified refs. Patterns: - @ for nodes: @name|placement:port - & for labels: &name|placement:port - $ for functions: $name|placement:port Returns: (pattern, captures_dict) """ identifier = r'[a-zA-Z_][a-zA-Z0-9_]*' hex_lit = r'0x[0-9a-fA-F]+' dec_lit = r'[0-9]+' # Pattern captures: # 1: sigil # 2: name # 3: placement separator (|) [optional] # 4: placement name [optional] # 5: port separator (:) [optional] # 6: port (identifier or number) [optional] pattern = ( f"({re.escape(sigil)})({identifier})" f"(?:(\\|)({identifier}))?" f"(?:(:)({identifier}|{hex_lit}|{dec_lit}))?" ) scope_prefix = { '@': 'node', '&': 'label', '$': 'function' } scope_type = scope_prefix.get(sigil, 'ref') name_scope = { '@': 'entity.name.tag.dfasm', '&': 'entity.name.label.dfasm', '$': 'entity.name.function.dfasm', }[sigil] captures = { '1': {'name': degrade_scope(f'punctuation.definition.reference.{scope_type}.dfasm')}, '2': {'name': degrade_scope(name_scope)}, '3': {'name': degrade_scope('punctuation.separator.placement.dfasm')}, '4': {'name': degrade_scope('entity.other.attribute-name.placement.dfasm')}, '5': {'name': degrade_scope('punctuation.separator.port.dfasm')}, '6': {'name': degrade_scope('constant.numeric.dfasm')}, } return pattern, captures def build_textmate_patterns(contexts: Dict[str, List[Dict]], variables: Dict[str, str]) -> Tuple[List[Dict], Dict[str, Dict]]: """ Convert sublime-syntax contexts to TextMate patterns and repository. Returns: (top_level_patterns, repository) """ patterns = [] repository = {} # Helper to inline variables in regex patterns def process_pattern(pattern_dict: Dict) -> Optional[Dict]: """Process a single pattern, inlining variables and handling special cases.""" result = {} # Skip empty-match patterns (from pop: true) — MINOR FIX 1 if pattern_dict.get('match') == '': return None # CRITICAL FIX 2: Handle context-based patterns by converting to begin/end match_val = pattern_dict.get('match', '') push_stack = pattern_dict.get('push') # String patterns with push context if match_val == 'r"' and push_stack == 'raw_string': return build_string_pattern('"', 'r', escaped=False) elif match_val == 'b"' and push_stack == 'byte_string': return build_string_pattern('"', 'b', escaped=True) elif match_val == '"' and push_stack == 'string': return build_string_pattern('"', '', escaped=True) elif match_val == "'" and push_stack == 'char_literal': return build_string_pattern("'", '', escaped=True) elif match_val == '\\{' and push_stack == 'func_body': return build_func_body_pattern() # Process main pattern fields for key, value in pattern_dict.items(): if key == 'match': result[key] = inline_variables(value, variables) elif key == 'scope': remapped = degrade_scope(value) if remapped: result['name'] = remapped elif key == 'captures': processed_captures = {} for cap_idx, cap_value in value.items(): if isinstance(cap_value, dict): scope_name = degrade_scope(cap_value.get('name', '')) else: scope_name = degrade_scope(cap_value) if scope_name: processed_captures[cap_idx] = {'name': scope_name} if processed_captures: result['captures'] = processed_captures elif key == 'meta_scope': result['contentName'] = degrade_scope(value) elif key == 'include': # IMPORTANT FIX 3: Use $self instead of #main for top-level inclusion include_val = value.replace('main', '$self') result[key] = include_val elif key in ('push', 'set', 'pop'): # Don't include context stack directives in TextMate pass else: result[key] = value return result if result else None # Process main context if 'main' in contexts: for item in contexts['main']: if isinstance(item, dict): processed = process_pattern(item) if processed: patterns.append(processed) # Build repository from specific non-main contexts that need to be referenced # IMPORTANT FIX 1: Only include contexts that are actually referenced or needed referenced_contexts = set() # Scan main patterns for includes for pattern in patterns: if 'include' in pattern: include_ref = pattern['include'] if include_ref.startswith('#'): referenced_contexts.add(include_ref[1:]) # Process contexts selectively for ctx_name, ctx_items in contexts.items(): if ctx_name == 'main': continue # Only include if referenced from main patterns if ctx_name not in referenced_contexts: continue repo_patterns = [] for item in ctx_items: if isinstance(item, dict): processed = process_pattern(item) if processed: repo_patterns.append(processed) if repo_patterns: repository[ctx_name] = {'patterns': repo_patterns} return patterns, repository SCOPE_REMAP = { # Port numbers degrade to generic numeric (TextMate has no context stacks) 'constant.numeric.port.dfasm': 'constant.numeric.dfasm', # Opcodes → support.function for cyan colour in most themes (distinct from keyword) 'keyword.other.opcode.dfasm': 'support.function.opcode.dfasm', # Pragma → variable scope for distinct colour from both keywords and opcodes 'keyword.control.pragma.dfasm': 'variable.language.pragma.dfasm', # Labels → entity.name.function for strong blue/green colour 'entity.name.label.dfasm': 'entity.name.function.label.dfasm', # Nodes and functions → entity.name.type for visible colour (cyan/green in most themes) 'entity.name.tag.dfasm': 'entity.name.type.node.dfasm', 'entity.name.function.dfasm': 'entity.name.type.function.dfasm', # Named params → entity.other.attribute-name for distinct attribute colour 'variable.parameter.dfasm': 'entity.other.attribute-name.dfasm', # Assignment operator → no special colour (default punctuation) 'keyword.operator.assignment.dfasm': '', } def degrade_scope(scope: str) -> str: """Remap scopes for better colour differentiation in TextMate themes.""" return SCOPE_REMAP.get(scope, scope) def build_textmate_grammar(sublime_syntax: Dict[str, Any]) -> Dict[str, Any]: """ Convert sublime-syntax to TextMate grammar structure. """ # Extract metadata name = sublime_syntax.get('name', 'dfasm') file_extensions = sublime_syntax.get('file_extensions', []) scope = sublime_syntax.get('scope', 'source.dfasm') variables = sublime_syntax.get('variables', {}) contexts = sublime_syntax.get('contexts', {}) # Build patterns and repository from contexts top_patterns, repository = build_textmate_patterns(contexts, variables) # Build qualified reference patterns # CRITICAL FIX 3: Remove bare sigil patterns first (positions 5-7) # Filter out bare sigil patterns from top_patterns filtered_patterns = [] for pattern in top_patterns: # Skip bare sigil patterns if pattern.get('match') in ('@', '&', r'\$'): continue filtered_patterns.append(pattern) # MINOR FIX 2: Build qualified ref patterns efficiently (single call per sigil) qualified_ref_patterns = [] for sigil, ref_type in [('@', 'node'), ('&', 'label'), ('$', 'function')]: pattern_str, captures_dict = build_qualified_ref_pattern(sigil, ref_type) qualified_ref_patterns.append({ 'match': pattern_str, 'captures': captures_dict }) # TextMate grammar structure grammar = { 'scopeName': scope, 'name': name, 'fileTypes': file_extensions, 'patterns': qualified_ref_patterns + filtered_patterns, 'repository': repository } return grammar def main(): """Main entry point.""" # Parse arguments input_file = sys.argv[1] if len(sys.argv) > 1 else 'editor/sublime/dfasm.sublime-syntax' output_file = sys.argv[2] if len(sys.argv) > 2 else 'editor/textmate/dfasm.tmLanguage.json' # Make paths absolute if not already input_path = Path(input_file).resolve() output_path = Path(output_file).resolve() # Load sublime-syntax print(f"Loading {input_path}...") sublime_syntax = load_sublime_syntax(str(input_path)) # Convert to TextMate print("Converting to TextMate format...") grammar = build_textmate_grammar(sublime_syntax) # Ensure output directory exists output_path.parent.mkdir(parents=True, exist_ok=True) # Write TextMate grammar print(f"Writing {output_path}...") with open(output_path, 'w') as f: json.dump(grammar, f, indent=2) print(f"✓ Conversion complete: {output_path}") if __name__ == '__main__': main()