radicle-reticulum/src/radicle_reticulum/git_bundle.py

468 lines
16 KiB
Python

"""Git bundle generation and application for Radicle repos.
Supports both full repository bundles and incremental bundles
containing only new commits since a known state.
Radicle stores data under these ref namespaces:
- refs/heads/* - Git branches
- refs/tags/* - Git tags
- refs/rad/id - Repository identity
- refs/rad/sigrefs - Signed refs
- refs/rad/cob/* - Collaborative Objects (issues, patches, etc.)
- refs/rad/cob/xyz.issue/*
- refs/rad/cob/xyz.patch/*
"""
import os
import subprocess
import tempfile
import hashlib
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import Dict, List, Optional, Set, Tuple
import struct
import time
class BundleType(Enum):
"""Type of Git bundle."""
FULL = "full" # Complete repository
INCREMENTAL = "incremental" # Only new commits
# Radicle ref patterns to include in sync
RADICLE_REF_PATTERNS = [
"refs/heads/*",
"refs/tags/*",
"refs/rad/id",
"refs/rad/sigrefs",
"refs/rad/cob/*",
]
@dataclass
class BundleMetadata:
"""Metadata about a Git bundle for transport."""
bundle_type: BundleType
repository_id: str # Radicle repo ID (rad:z...)
source_node: str # DID of source node
timestamp: int # Unix timestamp ms
refs_included: List[str] # List of refs in bundle
prerequisites: List[str] # Commits required (for incremental)
size_bytes: int
checksum: bytes # SHA-256 of bundle data
def encode(self) -> bytes:
"""Encode metadata to bytes."""
repo_bytes = self.repository_id.encode("utf-8")
node_bytes = self.source_node.encode("utf-8")
refs_data = b"".join(
struct.pack(f"!H{len(r)}s", len(r), r.encode("utf-8"))
for r in self.refs_included
)
prereq_data = b"".join(
struct.pack(f"!H{len(p)}s", len(p), p.encode("utf-8"))
for p in self.prerequisites
)
return struct.pack(
f"!BH{len(repo_bytes)}sH{len(node_bytes)}sQIH{len(refs_data)}sH{len(prereq_data)}s32s",
1 if self.bundle_type == BundleType.FULL else 2,
len(repo_bytes), repo_bytes,
len(node_bytes), node_bytes,
self.timestamp,
self.size_bytes,
len(self.refs_included), refs_data,
len(self.prerequisites), prereq_data,
self.checksum,
)
@classmethod
def decode(cls, data: bytes) -> Tuple["BundleMetadata", int]:
"""Decode metadata from bytes. Returns (metadata, bytes_consumed)."""
offset = 0
# Bundle type
bundle_type_raw = struct.unpack("!B", data[offset:offset+1])[0]
bundle_type = BundleType.FULL if bundle_type_raw == 1 else BundleType.INCREMENTAL
offset += 1
# Repository ID
repo_len = struct.unpack("!H", data[offset:offset+2])[0]
offset += 2
repository_id = data[offset:offset+repo_len].decode("utf-8")
offset += repo_len
# Source node
node_len = struct.unpack("!H", data[offset:offset+2])[0]
offset += 2
source_node = data[offset:offset+node_len].decode("utf-8")
offset += node_len
# Timestamp and size
timestamp, size_bytes = struct.unpack("!QI", data[offset:offset+12])
offset += 12
# Refs
refs_count = struct.unpack("!H", data[offset:offset+2])[0]
offset += 2
refs_included = []
for _ in range(refs_count):
ref_len = struct.unpack("!H", data[offset:offset+2])[0]
offset += 2
refs_included.append(data[offset:offset+ref_len].decode("utf-8"))
offset += ref_len
# Prerequisites
prereq_count = struct.unpack("!H", data[offset:offset+2])[0]
offset += 2
prerequisites = []
for _ in range(prereq_count):
prereq_len = struct.unpack("!H", data[offset:offset+2])[0]
offset += 2
prerequisites.append(data[offset:offset+prereq_len].decode("utf-8"))
offset += prereq_len
# Checksum
checksum = data[offset:offset+32]
offset += 32
return cls(
bundle_type=bundle_type,
repository_id=repository_id,
source_node=source_node,
timestamp=timestamp,
refs_included=refs_included,
prerequisites=prerequisites,
size_bytes=size_bytes,
checksum=checksum,
), offset
@dataclass
class GitBundle:
"""A Git bundle with metadata for transport."""
metadata: BundleMetadata
data: bytes
def encode(self) -> bytes:
"""Encode bundle with metadata for transport."""
meta_bytes = self.metadata.encode()
return struct.pack("!I", len(meta_bytes)) + meta_bytes + self.data
@classmethod
def decode(cls, data: bytes) -> "GitBundle":
"""Decode bundle from transport format."""
meta_len = struct.unpack("!I", data[:4])[0]
metadata, _ = BundleMetadata.decode(data[4:4+meta_len])
bundle_data = data[4+meta_len:]
# Verify checksum
actual_checksum = hashlib.sha256(bundle_data).digest()
if actual_checksum != metadata.checksum:
raise ValueError("Bundle checksum mismatch")
return cls(metadata=metadata, data=bundle_data)
def save(self, path: Path) -> None:
"""Save bundle data to file."""
path.write_bytes(self.data)
@property
def size(self) -> int:
"""Get total size including metadata."""
return len(self.encode())
class GitBundleGenerator:
"""Generates Git bundles from repositories."""
def __init__(self, repo_path: Path):
"""Initialize with path to Git repository."""
self.repo_path = Path(repo_path)
if not (self.repo_path / ".git").exists() and not (self.repo_path / "HEAD").exists():
raise ValueError(f"Not a Git repository: {repo_path}")
def _run_git(self, *args: str, check: bool = True) -> subprocess.CompletedProcess:
"""Run a git command in the repository."""
return subprocess.run(
["git", *args],
cwd=self.repo_path,
capture_output=True,
text=True,
check=check,
)
def _run_git_binary(self, *args: str) -> bytes:
"""Run a git command and return binary output."""
result = subprocess.run(
["git", *args],
cwd=self.repo_path,
capture_output=True,
check=True,
)
return result.stdout
def get_refs(self, patterns: Optional[List[str]] = None) -> Dict[str, str]:
"""Get refs matching patterns. Returns {ref_name: commit_sha}."""
if patterns is None:
patterns = RADICLE_REF_PATTERNS
refs = {}
for pattern in patterns:
result = self._run_git("for-each-ref", "--format=%(refname) %(objectname)", pattern, check=False)
if result.returncode == 0:
for line in result.stdout.strip().split("\n"):
if line:
parts = line.split()
if len(parts) == 2:
refs[parts[0]] = parts[1]
return refs
def get_radicle_repo_id(self) -> Optional[str]:
"""Get Radicle repository ID if this is a Radicle repo."""
# Radicle stores repo ID in .git/rad or config
rad_dir = self.repo_path / ".git" / "rad"
if rad_dir.exists():
# Try to read from rad config
try:
result = self._run_git("config", "--get", "rad.id", check=False)
if result.returncode == 0:
return result.stdout.strip()
except Exception:
pass
return None
def create_full_bundle(
self,
repository_id: str,
source_node: str,
output_path: Optional[Path] = None,
ref_patterns: Optional[List[str]] = None,
) -> GitBundle:
"""Create a full bundle containing all refs.
Args:
repository_id: Radicle repo ID (rad:z...)
source_node: DID of the source node
output_path: Optional path to write bundle file
ref_patterns: Ref patterns to include (default: Radicle patterns)
"""
refs = self.get_refs(ref_patterns)
if not refs:
raise ValueError("No refs to bundle")
# Create bundle with all refs
with tempfile.NamedTemporaryFile(suffix=".bundle", delete=False) as f:
bundle_path = f.name
try:
# Build ref list for bundle create
ref_args = list(refs.keys())
self._run_git("bundle", "create", bundle_path, *ref_args)
bundle_data = Path(bundle_path).read_bytes()
finally:
os.unlink(bundle_path)
metadata = BundleMetadata(
bundle_type=BundleType.FULL,
repository_id=repository_id,
source_node=source_node,
timestamp=int(time.time() * 1000),
refs_included=list(refs.keys()),
prerequisites=[],
size_bytes=len(bundle_data),
checksum=hashlib.sha256(bundle_data).digest(),
)
bundle = GitBundle(metadata=metadata, data=bundle_data)
if output_path:
bundle.save(output_path)
return bundle
def create_incremental_bundle(
self,
repository_id: str,
source_node: str,
basis_refs: Dict[str, str],
output_path: Optional[Path] = None,
ref_patterns: Optional[List[str]] = None,
) -> Optional[GitBundle]:
"""Create an incremental bundle with only new commits.
Args:
repository_id: Radicle repo ID
source_node: DID of the source node
basis_refs: Known refs at destination {ref_name: commit_sha}
output_path: Optional path to write bundle file
ref_patterns: Ref patterns to include
Returns:
GitBundle if there are changes, None if no changes
"""
current_refs = self.get_refs(ref_patterns)
if not current_refs:
return None
# Find refs that have changed or are new
changed_refs = {}
for ref, sha in current_refs.items():
if ref not in basis_refs or basis_refs[ref] != sha:
changed_refs[ref] = sha
if not changed_refs:
return None # No changes
# Build exclusion list (commits the destination already has)
exclusions = [f"^{sha}" for sha in basis_refs.values() if sha]
with tempfile.NamedTemporaryFile(suffix=".bundle", delete=False) as f:
bundle_path = f.name
try:
# Create bundle with changed refs, excluding known commits
bundle_args = list(changed_refs.keys()) + exclusions
result = self._run_git("bundle", "create", bundle_path, *bundle_args, check=False)
if result.returncode != 0:
# May fail if no new commits (all excluded)
if "empty bundle" in result.stderr.lower():
return None
raise subprocess.CalledProcessError(result.returncode, "git bundle create", result.stderr)
bundle_data = Path(bundle_path).read_bytes()
finally:
if os.path.exists(bundle_path):
os.unlink(bundle_path)
metadata = BundleMetadata(
bundle_type=BundleType.INCREMENTAL,
repository_id=repository_id,
source_node=source_node,
timestamp=int(time.time() * 1000),
refs_included=list(changed_refs.keys()),
prerequisites=list(basis_refs.values()),
size_bytes=len(bundle_data),
checksum=hashlib.sha256(bundle_data).digest(),
)
bundle = GitBundle(metadata=metadata, data=bundle_data)
if output_path:
bundle.save(output_path)
return bundle
class GitBundleApplicator:
"""Applies Git bundles to repositories."""
def __init__(self, repo_path: Path):
"""Initialize with path to Git repository."""
self.repo_path = Path(repo_path)
def _run_git(self, *args: str, check: bool = True) -> subprocess.CompletedProcess:
"""Run a git command in the repository."""
return subprocess.run(
["git", *args],
cwd=self.repo_path,
capture_output=True,
text=True,
check=check,
)
def verify_bundle(self, bundle: GitBundle) -> Tuple[bool, str]:
"""Verify a bundle can be applied.
Returns (success, message).
"""
with tempfile.NamedTemporaryFile(suffix=".bundle", delete=False) as f:
f.write(bundle.data)
bundle_path = f.name
try:
result = self._run_git("bundle", "verify", bundle_path, check=False)
if result.returncode == 0:
return True, "Bundle verified successfully"
else:
return False, result.stderr.strip()
finally:
os.unlink(bundle_path)
def apply_bundle(self, bundle: GitBundle, fetch_all: bool = True) -> Dict[str, str]:
"""Apply a bundle to the repository.
Args:
bundle: The GitBundle to apply
fetch_all: If True, fetch all refs from bundle
Returns:
Dict of applied refs {ref_name: commit_sha}
"""
with tempfile.NamedTemporaryFile(suffix=".bundle", delete=False) as f:
f.write(bundle.data)
bundle_path = f.name
try:
# Verify first
ok, msg = self.verify_bundle(bundle)
if not ok:
raise ValueError(f"Bundle verification failed: {msg}")
# List refs in bundle
result = self._run_git("bundle", "list-heads", bundle_path)
bundle_refs = {}
for line in result.stdout.strip().split("\n"):
if line:
parts = line.split()
if len(parts) >= 2:
bundle_refs[parts[1]] = parts[0]
# Fetch from bundle
if fetch_all:
# Fetch all refs, preserving their names
for ref in bundle_refs:
self._run_git("fetch", bundle_path, f"{ref}:{ref}", check=False)
else:
self._run_git("fetch", bundle_path)
return bundle_refs
finally:
os.unlink(bundle_path)
def get_current_refs(self, patterns: Optional[List[str]] = None) -> Dict[str, str]:
"""Get current refs for computing incremental basis."""
if patterns is None:
patterns = RADICLE_REF_PATTERNS
refs = {}
for pattern in patterns:
result = self._run_git("for-each-ref", "--format=%(refname) %(objectname)", pattern, check=False)
if result.returncode == 0:
for line in result.stdout.strip().split("\n"):
if line:
parts = line.split()
if len(parts) == 2:
refs[parts[0]] = parts[1]
return refs
def estimate_bundle_size(repo_path: Path, ref_patterns: Optional[List[str]] = None) -> int:
"""Estimate the size of a full bundle without creating it."""
result = subprocess.run(
["git", "count-objects", "-v"],
cwd=repo_path,
capture_output=True,
text=True,
)
# Parse size-pack from output
for line in result.stdout.split("\n"):
if line.startswith("size-pack:"):
# size-pack is in KB
return int(line.split(":")[1].strip()) * 1024
return 0