Create a new SeqOps pipeline
Input sequences (async iterable)
Filter sequences based on criteria
Remove sequences that don't meet specified criteria. All criteria within a single filter call are combined with AND logic.
Filter criteria or custom predicate
New SeqOps instance for chaining
Transform sequence content
Apply transformations that modify the sequence string itself.
Transform options
New SeqOps instance for chaining
Extract amplicons via primer sequences
Finds primer pairs within sequences and extracts the amplified regions. Supports mismatch tolerance, degenerate bases (IUPAC codes), windowed search for long-read performance, canonical matching for BED-extracted primers, and flexible region extraction. Provides complete seqkit amplicon parity with enhanced biological validation and type safety.
// Simple amplicon extraction (90% use case)
seqops(sequences)
.amplicon('ATCGATCG', 'CGATCGAT')
.writeFasta('amplicons.fasta');
// With mismatch tolerance (common case)
seqops(sequences)
.amplicon('ATCGATCG', 'CGATCGAT', 2)
.filter({ minLength: 50 });
// Single primer (auto-canonical matching)
seqops(sequences)
.amplicon('UNIVERSAL_PRIMER')
.stats();
// Real-world COVID-19 diagnostics
seqops(samples)
.quality({ minScore: 20 })
.amplicon(
primer`ACCAGGAACTAATCAGACAAG`, // N gene forward
primer`CAAAGACCAATCCTACCATGAG`, // N gene reverse
2 // Allow sequencing errors
)
.validate({ mode: 'strict' });
// Long reads with windowed search (massive performance boost)
seqops(nanoporeReads)
.amplicon('FORWARD', 'REVERSE', {
searchWindow: { forward: 200, reverse: 200 } // 100x+ speedup
});
// Advanced features (10% use case)
seqops(sequences)
.amplicon({
forwardPrimer: primer`ACCAGGAACTAATCAGACAAG`,
reversePrimer: primer`CAAAGACCAATCCTACCATGAG`,
maxMismatches: 3, // Long-read tolerance
canonical: true, // BED-extracted primers
flanking: true, // Include primer context
region: '-100:100', // Biological context
searchWindow: { forward: 200, reverse: 200 }, // Performance optimization
outputMismatches: true // Debug information
})
.rmdup('sequence')
.writeFasta('advanced_amplicons.fasta');
Extract amplicons via primer sequences
Finds primer pairs within sequences and extracts the amplified regions. Supports mismatch tolerance, degenerate bases (IUPAC codes), windowed search for long-read performance, canonical matching for BED-extracted primers, and flexible region extraction. Provides complete seqkit amplicon parity with enhanced biological validation and type safety.
// Simple amplicon extraction (90% use case)
seqops(sequences)
.amplicon('ATCGATCG', 'CGATCGAT')
.writeFasta('amplicons.fasta');
// With mismatch tolerance (common case)
seqops(sequences)
.amplicon('ATCGATCG', 'CGATCGAT', 2)
.filter({ minLength: 50 });
// Single primer (auto-canonical matching)
seqops(sequences)
.amplicon('UNIVERSAL_PRIMER')
.stats();
// Real-world COVID-19 diagnostics
seqops(samples)
.quality({ minScore: 20 })
.amplicon(
primer`ACCAGGAACTAATCAGACAAG`, // N gene forward
primer`CAAAGACCAATCCTACCATGAG`, // N gene reverse
2 // Allow sequencing errors
)
.validate({ mode: 'strict' });
// Long reads with windowed search (massive performance boost)
seqops(nanoporeReads)
.amplicon('FORWARD', 'REVERSE', {
searchWindow: { forward: 200, reverse: 200 } // 100x+ speedup
});
// Advanced features (10% use case)
seqops(sequences)
.amplicon({
forwardPrimer: primer`ACCAGGAACTAATCAGACAAG`,
reversePrimer: primer`CAAAGACCAATCCTACCATGAG`,
maxMismatches: 3, // Long-read tolerance
canonical: true, // BED-extracted primers
flanking: true, // Include primer context
region: '-100:100', // Biological context
searchWindow: { forward: 200, reverse: 200 }, // Performance optimization
outputMismatches: true // Debug information
})
.rmdup('sequence')
.writeFasta('advanced_amplicons.fasta');
Extract amplicons via primer sequences
Finds primer pairs within sequences and extracts the amplified regions. Supports mismatch tolerance, degenerate bases (IUPAC codes), windowed search for long-read performance, canonical matching for BED-extracted primers, and flexible region extraction. Provides complete seqkit amplicon parity with enhanced biological validation and type safety.
// Simple amplicon extraction (90% use case)
seqops(sequences)
.amplicon('ATCGATCG', 'CGATCGAT')
.writeFasta('amplicons.fasta');
// With mismatch tolerance (common case)
seqops(sequences)
.amplicon('ATCGATCG', 'CGATCGAT', 2)
.filter({ minLength: 50 });
// Single primer (auto-canonical matching)
seqops(sequences)
.amplicon('UNIVERSAL_PRIMER')
.stats();
// Real-world COVID-19 diagnostics
seqops(samples)
.quality({ minScore: 20 })
.amplicon(
primer`ACCAGGAACTAATCAGACAAG`, // N gene forward
primer`CAAAGACCAATCCTACCATGAG`, // N gene reverse
2 // Allow sequencing errors
)
.validate({ mode: 'strict' });
// Long reads with windowed search (massive performance boost)
seqops(nanoporeReads)
.amplicon('FORWARD', 'REVERSE', {
searchWindow: { forward: 200, reverse: 200 } // 100x+ speedup
});
// Advanced features (10% use case)
seqops(sequences)
.amplicon({
forwardPrimer: primer`ACCAGGAACTAATCAGACAAG`,
reversePrimer: primer`CAAAGACCAATCCTACCATGAG`,
maxMismatches: 3, // Long-read tolerance
canonical: true, // BED-extracted primers
flanking: true, // Include primer context
region: '-100:100', // Biological context
searchWindow: { forward: 200, reverse: 200 }, // Performance optimization
outputMismatches: true // Debug information
})
.rmdup('sequence')
.writeFasta('advanced_amplicons.fasta');
Extract amplicons via primer sequences
Finds primer pairs within sequences and extracts the amplified regions. Supports mismatch tolerance, degenerate bases (IUPAC codes), windowed search for long-read performance, canonical matching for BED-extracted primers, and flexible region extraction. Provides complete seqkit amplicon parity with enhanced biological validation and type safety.
// Simple amplicon extraction (90% use case)
seqops(sequences)
.amplicon('ATCGATCG', 'CGATCGAT')
.writeFasta('amplicons.fasta');
// With mismatch tolerance (common case)
seqops(sequences)
.amplicon('ATCGATCG', 'CGATCGAT', 2)
.filter({ minLength: 50 });
// Single primer (auto-canonical matching)
seqops(sequences)
.amplicon('UNIVERSAL_PRIMER')
.stats();
// Real-world COVID-19 diagnostics
seqops(samples)
.quality({ minScore: 20 })
.amplicon(
primer`ACCAGGAACTAATCAGACAAG`, // N gene forward
primer`CAAAGACCAATCCTACCATGAG`, // N gene reverse
2 // Allow sequencing errors
)
.validate({ mode: 'strict' });
// Long reads with windowed search (massive performance boost)
seqops(nanoporeReads)
.amplicon('FORWARD', 'REVERSE', {
searchWindow: { forward: 200, reverse: 200 } // 100x+ speedup
});
// Advanced features (10% use case)
seqops(sequences)
.amplicon({
forwardPrimer: primer`ACCAGGAACTAATCAGACAAG`,
reversePrimer: primer`CAAAGACCAATCCTACCATGAG`,
maxMismatches: 3, // Long-read tolerance
canonical: true, // BED-extracted primers
flanking: true, // Include primer context
region: '-100:100', // Biological context
searchWindow: { forward: 200, reverse: 200 }, // Performance optimization
outputMismatches: true // Debug information
})
.rmdup('sequence')
.writeFasta('advanced_amplicons.fasta');
Extract amplicons via primer sequences
Finds primer pairs within sequences and extracts the amplified regions. Supports mismatch tolerance, degenerate bases (IUPAC codes), windowed search for long-read performance, canonical matching for BED-extracted primers, and flexible region extraction. Provides complete seqkit amplicon parity with enhanced biological validation and type safety.
// Simple amplicon extraction (90% use case)
seqops(sequences)
.amplicon('ATCGATCG', 'CGATCGAT')
.writeFasta('amplicons.fasta');
// With mismatch tolerance (common case)
seqops(sequences)
.amplicon('ATCGATCG', 'CGATCGAT', 2)
.filter({ minLength: 50 });
// Single primer (auto-canonical matching)
seqops(sequences)
.amplicon('UNIVERSAL_PRIMER')
.stats();
// Real-world COVID-19 diagnostics
seqops(samples)
.quality({ minScore: 20 })
.amplicon(
primer`ACCAGGAACTAATCAGACAAG`, // N gene forward
primer`CAAAGACCAATCCTACCATGAG`, // N gene reverse
2 // Allow sequencing errors
)
.validate({ mode: 'strict' });
// Long reads with windowed search (massive performance boost)
seqops(nanoporeReads)
.amplicon('FORWARD', 'REVERSE', {
searchWindow: { forward: 200, reverse: 200 } // 100x+ speedup
});
// Advanced features (10% use case)
seqops(sequences)
.amplicon({
forwardPrimer: primer`ACCAGGAACTAATCAGACAAG`,
reversePrimer: primer`CAAAGACCAATCCTACCATGAG`,
maxMismatches: 3, // Long-read tolerance
canonical: true, // BED-extracted primers
flanking: true, // Include primer context
region: '-100:100', // Biological context
searchWindow: { forward: 200, reverse: 200 }, // Performance optimization
outputMismatches: true // Debug information
})
.rmdup('sequence')
.writeFasta('advanced_amplicons.fasta');
Clean and sanitize sequences
Fix common issues in sequence data such as gaps, ambiguous bases, and whitespace.
Clean options
New SeqOps instance for chaining
FASTQ quality operations
Filter and trim sequences based on quality scores. Only affects FASTQ sequences; FASTA sequences pass through unchanged.
Quality options
New SeqOps instance for chaining
Convert FASTQ quality score encodings
Convert quality scores between different encoding schemes (Phred+33, Phred+64, Solexa). Essential for legacy data processing and tool compatibility. Only affects FASTQ sequences; FASTA sequences pass through unchanged.
New SeqOps instance for chaining
// Primary workflow: Auto-detect source encoding (matches seqkit)
seqops(legacyData)
.convert({ targetEncoding: 'phred33' })
.writeFastq('modernized.fastq');
// Legacy Illumina 1.3-1.7 to modern standard
seqops(illumina15Data)
.convert({
sourceEncoding: 'phred64', // Skip detection for known encoding
targetEncoding: 'phred33' // Modern standard
})
// Real-world pipeline: QC → standardize encoding → analysis
const results = await seqops(mixedEncodingFiles)
.quality({ minScore: 20 }) // Filter first
.convert({ targetEncoding: 'phred33' }) // Standardize
.stats({ detailed: true });
Validate sequences
Check sequences for validity and optionally fix or reject invalid ones.
Validation options
New SeqOps instance for chaining
Search sequences by pattern
Pattern matching and filtering similar to Unix grep. Supports both simple string patterns and complex options for advanced use cases.
// Simple sequence search (most common case)
seqops(sequences)
.grep('ATCG') // Search sequences for 'ATCG'
.grep(/^chr\d+/, 'id') // Search IDs with regex
// Advanced options for complex scenarios
seqops(sequences)
.grep({
pattern: 'ATCGATCG',
target: 'sequence',
allowMismatches: 2,
searchBothStrands: true
})
Search sequences by pattern
Pattern matching and filtering similar to Unix grep. Supports both simple string patterns and complex options for advanced use cases.
// Simple sequence search (most common case)
seqops(sequences)
.grep('ATCG') // Search sequences for 'ATCG'
.grep(/^chr\d+/, 'id') // Search IDs with regex
// Advanced options for complex scenarios
seqops(sequences)
.grep({
pattern: 'ATCGATCG',
target: 'sequence',
allowMismatches: 2,
searchBothStrands: true
})
Search sequences by pattern
Pattern matching and filtering similar to Unix grep. Supports both simple string patterns and complex options for advanced use cases.
// Simple sequence search (most common case)
seqops(sequences)
.grep('ATCG') // Search sequences for 'ATCG'
.grep(/^chr\d+/, 'id') // Search IDs with regex
// Advanced options for complex scenarios
seqops(sequences)
.grep({
pattern: 'ATCGATCG',
target: 'sequence',
allowMismatches: 2,
searchBothStrands: true
})
Search sequences by pattern
Pattern matching and filtering similar to Unix grep. Supports both simple string patterns and complex options for advanced use cases.
// Simple sequence search (most common case)
seqops(sequences)
.grep('ATCG') // Search sequences for 'ATCG'
.grep(/^chr\d+/, 'id') // Search IDs with regex
// Advanced options for complex scenarios
seqops(sequences)
.grep({
pattern: 'ATCGATCG',
target: 'sequence',
allowMismatches: 2,
searchBothStrands: true
})
Search sequences by pattern
Pattern matching and filtering similar to Unix grep. Supports both simple string patterns and complex options for advanced use cases.
// Simple sequence search (most common case)
seqops(sequences)
.grep('ATCG') // Search sequences for 'ATCG'
.grep(/^chr\d+/, 'id') // Search IDs with regex
// Advanced options for complex scenarios
seqops(sequences)
.grep({
pattern: 'ATCGATCG',
target: 'sequence',
allowMismatches: 2,
searchBothStrands: true
})
Concatenate sequences from multiple sources
Combines sequences from multiple file paths and/or AsyncIterables with sophisticated ID conflict resolution. Maintains streaming behavior for memory efficiency with large datasets.
Array of file paths and/or AsyncIterables to concatenate
Optional
options: Omit<ConcatOptions, "sources">Concatenation options (optional)
New SeqOps instance for chaining
// Simple concatenation from files
seqops(sequences)
.concat(['file1.fasta', 'file2.fasta'])
.concat([anotherAsyncIterable])
// Advanced options for complex scenarios
seqops(sequences)
.concat(['file1.fasta', 'file2.fasta'], {
idConflictResolution: 'suffix',
validateFormats: true,
sourceLabels: ['batch1', 'batch2'],
onProgress: (processed, total, source) =>
console.log(`Processed ${processed} from ${source}`)
})
Extract subsequences
Mirrors seqkit subseq
functionality for region extraction.
Extraction options
New SeqOps instance for chaining
Sort sequences by specified criteria
High-performance sorting optimized for genomic data compression. Automatically switches between in-memory and external sorting based on dataset size. Proper sequence ordering dramatically improves compression ratios for genomic datasets.
Sort criteria and options
New SeqOps instance for chaining
// Sort by length for compression optimization
seqops(sequences)
.sort({ by: 'length', order: 'desc' })
// Sort by GC content for clustering similar sequences
seqops(sequences)
.sort({ by: 'gc', order: 'asc' })
// Custom sorting for specialized genomic criteria
seqops(sequences)
.sort({
custom: (a, b) => a.sequence.localeCompare(b.sequence)
})
Translate DNA/RNA sequences to proteins
High-performance protein translation supporting all 31 NCBI genetic codes with progressive disclosure for optimal developer experience.
Optional
geneticCode: number | TranslateOptionsGenetic code number (1-33) or full options object
New SeqOps instance for chaining
Split sequences into multiple files
Terminal operation that writes pipeline sequences to separate files with comprehensive seqkit split/split2 compatibility. Integrates seamlessly with all SeqOps pipeline operations for sophisticated genomic workflows.
Split configuration options
Promise resolving to split results summary
// Basic usage - split after processing
const result = await seqops(sequences)
.filter({ minLength: 100 })
.clean({ removeGaps: true })
.split({ mode: 'by-size', sequencesPerFile: 1000 });
// Real-world genomics: Quality control → split for parallel processing
const qcResults = await seqops(rawReads)
.quality({ minScore: 20, trim: true }) // Quality filter
.filter({ minLength: 50, maxLength: 150 }) // Length filter
.clean({ removeAmbiguous: true }) // Clean sequences
.split({ mode: 'by-length', basesPerFile: 1000000 }); // 1MB chunks
// Genome assembly: Split chromosomes for parallel analysis
const chrResults = await seqops(genome)
.grep({ pattern: /^chr[1-9]/, target: 'id' }) // Autosomal only
.transform({ upperCase: true }) // Normalize case
.split({ mode: 'by-id', idRegex: 'chr(\\d+)' }); // Group by chromosome
// Amplicon sequencing: Process primers → split by target
const amplicons = await seqops(sequences)
.grep({ pattern: forwardPrimer, target: 'sequence' }) // Has forward primer
.grep({ pattern: reversePrimer, target: 'sequence' }) // Has reverse primer
.subseq({ region: '20:-20' }) // Trim primers
.split({ mode: 'by-parts', numParts: 8 }); // Parallel processing
console.log(`Created ${result.filesCreated.length} files`);
Split sequences with streaming results for advanced processing
Returns AsyncIterable of split results following the locate() pattern. Enables sophisticated post-processing workflows where each split result needs individual handling during the splitting process.
Split configuration options
AsyncIterable of split results for processing
// Basic streaming - process each split file as it's created
for await (const result of seqops(sequences).splitToStream(options)) {
await compressFile(result.outputFile);
console.log(`Split ${result.sequenceCount} sequences to ${result.outputFile}`);
}
// Large genome processing: Split → compress → upload pipeline
for await (const chunk of seqops(largeGenome).splitToStream({
mode: 'by-length',
basesPerFile: 50_000_000 // 50MB chunks
})) {
// Process each chunk immediately to manage memory
await compressWithBgzip(chunk.outputFile);
await uploadToCloud(chunk.outputFile + '.gz');
await deleteLocalFile(chunk.outputFile); // Clean up
console.log(`Processed chunk ${chunk.partId}: ${chunk.sequenceCount} sequences`);
}
// Quality control: Split → validate → report pipeline
const qualityReports = [];
for await (const batch of seqops(sequencingRun).splitToStream({
mode: 'by-size',
sequencesPerFile: 10000
})) {
const qc = await runQualityControl(batch.outputFile);
qualityReports.push({
file: batch.outputFile,
sequences: batch.sequenceCount,
qcScore: qc.overallScore
});
}
Split by sequence count (convenience method)
Most common splitting mode - divide sequences into files with N sequences each. Ideal for creating manageable chunks for parallel processing.
Number of sequences per output file
Output directory (default: './split')
Promise resolving to split results
// Simple case - just split
await seqops(sequences).splitBySize(1000);
// Common workflow: Filter → process → split for downstream analysis
await seqops(rawSequences)
.filter({ minLength: 100 })
.clean({ removeGaps: true })
.splitBySize(5000, './chunks');
// RNA-seq: Quality filter → deduplicate → split for differential expression
await seqops(rnaseqReads)
.quality({ minScore: 20 })
.rmdup({ by: 'sequence' })
.splitBySize(100000, './de-analysis');
Split into equal parts (convenience method)
Number of output files to create
Output directory (default: './split')
Promise resolving to split results
Split by base count (convenience method)
Implements seqkit split2's key functionality for splitting by total sequence bases rather than sequence count. Essential for genome processing where you need consistent data sizes regardless of sequence count.
Number of bases per output file
Output directory (default: './split')
Promise resolving to split results
// Genome assembly: Split into 10MB chunks for parallel processing
await seqops(scaffolds).splitByLength(10_000_000);
// Metagenomics: Process → bin → split by data size
await seqops(contigs)
.filter({ minLength: 1000 })
.sort({ by: 'length', order: 'desc' }) // Longest first
.splitByLength(5_000_000, './metagenome-bins');
// Long-read sequencing: Quality control → split for analysis
await seqops(nanoporeReads)
.quality({ minScore: 7 }) // Nanopore quality threshold
.filter({ minLength: 5000, maxLength: 100000 })
.splitByLength(50_000_000, './nanopore-chunks');
Split by sequence ID pattern (convenience method)
Groups sequences by ID patterns for organized analysis. String patterns are automatically converted to RegExp for better developer experience.
String pattern or RegExp to group sequences by ID
Output directory (default: './split')
Promise resolving to split results
// Genome assembly: Split by chromosome
await seqops(scaffolds).splitById('chr(\\d+)'); // chr1, chr2, chr3...
// Multi-species analysis: Group by organism
await seqops(sequences)
.splitById('(\\w+)_gene'); // Groups: human_gene, mouse_gene, etc.
// Transcriptome: Split by gene families
await seqops(transcripts)
.filter({ minLength: 200 })
.transform({ upperCase: true })
.splitById('(HOX\\w+)_transcript', './gene-families');
// Advanced: Use RegExp for complex patterns
await seqops(sequences)
.splitById(/^(chr[XY]|chrM)_/, './sex-chromosomes');
Split by genomic region with compile-time validation (convenience method)
Uses advanced TypeScript template literal types to parse and validate genomic regions at compile time, preventing coordinate errors.
Promise resolving to split results
// ✅ Type-safe region parsing - validated at compile time
await seqops(sequences).splitByRegion('chr1:1000-2000');
await seqops(sequences).splitByRegion('scaffold_1:500-1500');
await seqops(sequences).splitByRegion('chrX:0-1000'); // 0-based OK
// ❌ These cause TypeScript compilation errors:
// await seqops(sequences).splitByRegion('chr1:2000-1000'); // end < start
// await seqops(sequences).splitByRegion('chr1:1000-1000'); // end = start
// await seqops(sequences).splitByRegion('invalid-format'); // bad format
// 🔥 Compile-time coordinate extraction available:
type Coords = ExtractCoordinates<'chr1:1000-2000'>;
// → { chr: 'chr1'; start: 1000; end: 2000; length: 1000 }
Calculate sequence statistics
Terminal operation that processes all sequences to compute statistics.
Mirrors seqkit stats
functionality.
Statistics options
Promise resolving to statistics
Write sequences to FASTA file
Terminal operation that writes all sequences in FASTA format.
Output file path
Writer options
Promise resolving when write is complete
Write sequences to FASTQ file
Terminal operation that writes all sequences in FASTQ format. If input sequences don't have quality scores, uses default quality.
Output file path
Default quality string for FASTA sequences
Promise resolving when write is complete
Collect all sequences into an array
Terminal operation that materializes all sequences in memory. Use with caution on large datasets.
Promise resolving to array of sequences
Process each sequence with a callback
Terminal operation that applies a function to each sequence.
Callback function
Promise resolving when processing is complete
Find pattern locations in sequences
Terminal operation that finds all occurrences of patterns within sequences
with support for fuzzy matching, strand searching, and various output formats.
Mirrors seqkit locate
functionality.
// Simple cases (most common)
const locations = seqops(sequences)
.locate('ATCG') // Exact string match
.locate(/ATG...TAA/) // Regex pattern
.locate('ATCG', 2); // Allow 2 mismatches
// Advanced options for complex scenarios
const locations = seqops(sequences).locate({
pattern: 'ATCG',
allowMismatches: 1,
searchBothStrands: true,
outputFormat: 'bed'
});
for await (const location of locations) {
console.log(`Found at ${location.start}-${location.end} on ${location.strand}`);
}
Find pattern locations in sequences
Terminal operation that finds all occurrences of patterns within sequences
with support for fuzzy matching, strand searching, and various output formats.
Mirrors seqkit locate
functionality.
// Simple cases (most common)
const locations = seqops(sequences)
.locate('ATCG') // Exact string match
.locate(/ATG...TAA/) // Regex pattern
.locate('ATCG', 2); // Allow 2 mismatches
// Advanced options for complex scenarios
const locations = seqops(sequences).locate({
pattern: 'ATCG',
allowMismatches: 1,
searchBothStrands: true,
outputFormat: 'bed'
});
for await (const location of locations) {
console.log(`Found at ${location.start}-${location.end} on ${location.strand}`);
}
Find pattern locations in sequences
Terminal operation that finds all occurrences of patterns within sequences
with support for fuzzy matching, strand searching, and various output formats.
Mirrors seqkit locate
functionality.
// Simple cases (most common)
const locations = seqops(sequences)
.locate('ATCG') // Exact string match
.locate(/ATG...TAA/) // Regex pattern
.locate('ATCG', 2); // Allow 2 mismatches
// Advanced options for complex scenarios
const locations = seqops(sequences).locate({
pattern: 'ATCG',
allowMismatches: 1,
searchBothStrands: true,
outputFormat: 'bed'
});
for await (const location of locations) {
console.log(`Found at ${location.start}-${location.end} on ${location.strand}`);
}
Find pattern locations in sequences
Terminal operation that finds all occurrences of patterns within sequences
with support for fuzzy matching, strand searching, and various output formats.
Mirrors seqkit locate
functionality.
// Simple cases (most common)
const locations = seqops(sequences)
.locate('ATCG') // Exact string match
.locate(/ATG...TAA/) // Regex pattern
.locate('ATCG', 2); // Allow 2 mismatches
// Advanced options for complex scenarios
const locations = seqops(sequences).locate({
pattern: 'ATCG',
allowMismatches: 1,
searchBothStrands: true,
outputFormat: 'bed'
});
for await (const location of locations) {
console.log(`Found at ${location.start}-${location.end} on ${location.strand}`);
}
Find pattern locations in sequences
Terminal operation that finds all occurrences of patterns within sequences
with support for fuzzy matching, strand searching, and various output formats.
Mirrors seqkit locate
functionality.
// Simple cases (most common)
const locations = seqops(sequences)
.locate('ATCG') // Exact string match
.locate(/ATG...TAA/) // Regex pattern
.locate('ATCG', 2); // Allow 2 mismatches
// Advanced options for complex scenarios
const locations = seqops(sequences).locate({
pattern: 'ATCG',
allowMismatches: 1,
searchBothStrands: true,
outputFormat: 'bed'
});
for await (const location of locations) {
console.log(`Found at ${location.start}-${location.end} on ${location.strand}`);
}
Enable direct iteration over the pipeline
Async iterator for sequences
Main SeqOps class providing fluent interface for sequence operations
Enables Unix pipeline-style method chaining for processing genomic sequences. All operations are lazy-evaluated and maintain streaming behavior for memory efficiency with large datasets.
Example