vignettes/advanced-usage.Rmd
advanced-usage.RmdThe segmentation behavior can be controlled by several parameters:
# More sensitive segmentation (more segments)
battenberg(
# ... other parameters ...
segmentation_gamma = 25, # Higher = more segments
segmentation_kmin = 1, # Minimum segment size
phasing_kmin = 1 # Minimum size for phasing
)
# Less sensitive segmentation (fewer segments)
battenberg(
# ... other parameters ...
segmentation_gamma = 5, # Lower = fewer segments
segmentation_kmin = 5, # Larger minimum segment size
phasing_kmin = 3 # Larger minimum size for phasing
)Adjust expected ranges based on sample characteristics:
# High purity sample
battenberg(
# ... other parameters ...
min_rho = 0.8, # Minimum 80% purity
max_ploidy = 6.0, # Allow higher ploidy
min_goodness = 0.65 # Stricter goodness of fit
)
# Low purity sample
battenberg(
# ... other parameters ...
min_rho = 0.3, # Allow 30% purity
max_ploidy = 3.5, # Lower ploidy range
min_goodness = 0.55 # More lenient goodness
)
# Strict quality control
battenberg(
# ... other parameters ...
min_normal_depth = 15, # Higher coverage requirement
min_base_qual = 25, # Higher base quality
min_map_qual = 40, # Higher mapping quality
uninformative_BAF_threshold = 0.49 # Stricter BAF threshold
)Battenberg can incorporate prior breakpoints from structural variant calls:
# Create prior breakpoints file (2 columns: chr, pos)
prior_breakpoints <- data.frame(
chr = c("1", "1", "2", "3"),
pos = c(1500000, 2500000, 5000000, 1000000)
)
write.table(prior_breakpoints, "prior_breakpoints.txt",
row.names = FALSE, col.names = FALSE,
quote = FALSE, sep = "\t")
# Use in Battenberg
battenberg(
# ... other parameters ...
prior_breakpoints_file = "prior_breakpoints.txt"
)For improved phasing, especially with newer reference panels:
# Setup Beagle5 parameters
BEAGLEJAR <- "path/to/beagle.24Aug19.3e8.jar"
BEAGLEREF_TEMPLATE <- "path/to/beagle_ref_chrCHROMNAME.1kg.phase3.v5a.b37.bref3"
BEAGLEPLINK_TEMPLATE <- "path/to/plink.chrCHROMNAME.GRCh37.map"
battenberg(
# ... other parameters ...
usebeagle = TRUE,
beaglejar = BEAGLEJAR,
beagleref = BEAGLEREF_TEMPLATE,
beagleplink = BEAGLEPLINK_TEMPLATE,
beaglemaxmem = 16, # Memory in GB
beaglenthreads = 4, # Threads for Beagle
beaglewindow = 40, # Window size
beagleoverlap = 4 # Overlap size
)For analyzing multiple samples together:
# Define multiple samples
tumournames <- c("sample1_tumor", "sample2_tumor", "sample3_tumor")
normalnames <- c("sample1_normal", "sample2_normal", "sample3_normal")
tumourbams <- c("path/to/sample1_tumor.bam", "path/to/sample2_tumor.bam",
"path/to/sample3_tumor.bam")
normalbams <- c("path/to/sample1_normal.bam", "path/to/sample2_normal.bam",
"path/to/sample3_normal.bam")
# Run multisample analysis
battenberg(
tumourname = tumournames,
normalname = normalnames,
tumour_data_file = tumourbams,
normal_data_file = normalbams,
# ... other parameters ...
multisample_maxlag = 150, # Max upstream SNPs for multisample phasing
multisample_relative_weight_balanced = 0.5, # Weight for balanced samples
write_battenberg_phasing = TRUE # Write phasing results
)For cell line data (tumor-only analysis):
battenberg(
analysis = "cell_line", # Changed from default "paired"
tumourname = "cell_line_sample",
normalname = NA, # No normal sample
tumour_data_file = "path/to/cell_line.bam",
normal_data_file = NA, # No normal BAM
# ... other parameters adjusted for cell line analysis ...
min_rho = 0.95, # Expect high purity
min_goodness = 0.7 # Stricter goodness for cell lines
)For SNP6 array data:
battenberg(
# ... other parameters ...
data_type = "snp6",
platform_gamma = 1,
snp6_reference_info_file = "path/to/snp6_reference_info.txt",
apt.probeset.genotype.exe = "apt-probeset-genotype",
apt.probeset.summarize.exe = "apt-probeset-summarize",
norm.geno.clust.exe = "normalize_affy_geno_cluster.pl",
birdseed_report_file = "birdseed.report.txt"
)
# Use more threads for faster processing
battenberg(
# ... other parameters ...
nthreads = 16, # Use 16 CPU cores
beaglenthreads = 8 # Use 8 cores for Beagle (if using)
)
# For large datasets, adjust memory settings
battenberg(
# ... other parameters ...
beaglemaxmem = 32, # 32GB for Beagle
# Consider running chromosomes separately for very large files
)For rerunning parts of the analysis:
# Skip allele counting if already done
battenberg(
# ... other parameters ...
skip_allele_counting = TRUE,
skip_preprocessing = FALSE,
skip_phasing = FALSE
)
# Skip preprocessing if rerunning
battenberg(
# ... other parameters ...
skip_allele_counting = FALSE,
skip_preprocessing = TRUE,
skip_phasing = FALSE
)For different reference genomes:
# Specify genome build
battenberg(
# ... other parameters ...
GENOMEBUILD = "hg38", # or "hg19"
# Ensure reference files match the specified build
)Using external phasing information:
battenberg(
# ... other parameters ...
externalhaplotypefile = "path/to/external_haplotypes.vcf",
write_battenberg_phasing = TRUE
)min_normal_depth and quality thresholdsmin_goodness to be more lenientAfter running Battenberg, assess quality using:
# Example quality check
cn_data <- read.delim("sample_tumor_copynumber.txt")
# Check for very short segments (potential artifacts)
short_segments <- cn_data[cn_data$endpos - cn_data$startpos < 1000000, ]
if(nrow(short_segments) > 0) {
cat("Warning: Found", nrow(short_segments), "segments < 1Mb\n")
}
# Check purity estimate
rho_psi <- read.delim("sample_tumor_rho_and_psi.txt")
purity <- rho_psi$rho[2]
if(purity < 0.3) {
cat("Warning: Low estimated purity:", purity, "\n")
}