#
# @include "_common_stages.mro"
#

#
# Copyright (c) 2017 10X Genomics, Inc. All rights reserved.
#

filetype bam;
filetype bam.bai;
filetype csv;
filetype fastq;
filetype json;
filetype pickle;
filetype fa;
filetype h5;
#
# @include "_cr_lib_stages.mro"
#

#
# Copyright (c) 2020 10X Genomics, Inc. All rights reserved.
#
# WARNING: This file is auto-generated.
# DO NOT MODIFY THIS FILE DIRECTLY
#

filetype ann.bincode.lz4;
filetype arp.bincode;
filetype asf;
filetype bam;
filetype bam.bai;
filetype bcc.bincode;
filetype bcm.bincode;
filetype bed;
filetype bi.bincode;
filetype bin;
filetype bincode;
filetype bincode.lz4;
filetype blf.json;
filetype bmsf;
filetype bsc.bincode;
filetype bsf.bincode;
filetype bui;
filetype csf;
filetype csv;
filetype em.json;
filetype fa;
filetype fasta;
filetype fasta.fai;
filetype fastq;
filetype fbc.bincode;
filetype frf.bincode;
filetype h5;
filetype json;
filetype msh.bincode;
filetype msm.bincode;
filetype pb;
filetype rpc;
filetype shard;
filetype smf.json;
filetype svg;
filetype tbcc.bincode;
filetype tsv;
filetype txt;
filetype umi;
filetype vloupe;
filetype vwc.json;
#
# @include "_run_fbpca_stage.mro"
#

#
# Copyright (c) 2020 10X Genomics, Inc. All rights reserved.
#

filetype h5;
filetype pickle;
#
# @include "_sc_rna_analyzer_stages.mro"
#

#
# Copyright (c) 2019 10X Genomics, Inc. All rights reserved.
#

filetype csv;
filetype h5;
filetype html;
filetype json;
filetype pickle;
filetype binary;
#
# @include "_sc_rna_aggregator_stages.mro"
#

#
# Copyright (c) 2017 10X Genomics, Inc. All rights reserved.
#

filetype bam;
filetype bam.bai;
filetype csv;
filetype tsv;
filetype fastq;
filetype json;
filetype h5;
filetype html;
filetype pickle;
#
# @include "_assign_tags_stages.mro"
#

#
# Copyright (c) 2018 10X Genomics, Inc. All rights reserved.
#

filetype csv;
filetype h5;
filetype json;
filetype pickle;
#
# @include "_assign_tags.mro"
#

filetype csv;
filetype h5;
filetype json;
filetype html;
filetype pickle;
#
# @include "_basic_sc_rna_counter_stages.mro"
#

#
# Copyright (c) 2019 10X Genomics, Inc. All rights reserved.
#

filetype bam;
filetype bam.bai;
filetype csv;
filetype fastq;
filetype json;
filetype h5;
filetype pickle;
filetype bincode;
#
# @include "_common_cloupe_stages.mro"
#

#
# Copyright (c) 2016 10X Genomics, Inc. All rights reserved.
#

filetype cloupe;
filetype csv;
filetype json;
filetype h5;
filetype txt;
#
# @include "_sc_crispr_analyzer_stages.mro"
#

#
# Copyright (c) 2018 10X Genomics, Inc. All rights reserved.
#

filetype csv;
filetype pdf;
filetype h5;
filetype json;
#
# @include "_crispr_analyzer.mro"
#

filetype pdf;
filetype csv;
filetype h5;
filetype json;
#
# @include "_sc_rna_counter_stages.mro"
#

#
# Copyright (c) 2015 10X Genomics, Inc. All rights reserved.
#

filetype csv;
filetype json;
filetype h5;
filetype html;
#
# @include "_sc_vdj_assembler_stages.mro"
#

#
# Copyright (c) 2017 10X Genomics, Inc. All rights reserved.
#

filetype bam;
filetype bam.bai;
filetype sam;
filetype fasta;
filetype fasta.fai;
filetype fastq;
filetype fastq.lz4;
filetype h5;
filetype json;
filetype pickle;
filetype gtf;
filetype csv;
filetype tsv;
filetype html;
filetype lz4;
filetype bin;
filetype txt;
#
# @include "_sc_rna_targeted_analyzer_stages.mro"
#

#
# Copyright (c) 2018 10X Genomics, Inc. All rights reserved.
#

filetype csv;
filetype pdf;
filetype h5;
filetype json;
filetype bam;
filetype bam.bai;
filetype fa;
#
# @include "_targeted_analyzer.mro"
#

filetype csv;
filetype h5;
filetype json;
#
# @include "_vloupe_stages.mro"
#

#
# Copyright (c) 2017 10X Genomics, Inc. All rights reserved.
#

filetype pb;
filetype vloupe;
#
# @include "_sc_multi_defs.mro"
#

filetype bam;
filetype bam.bai;
filetype html;
filetype json;
filetype vloupe;
filetype disable_targeted;
filetype svg;

#
# @include "_common_stages.mro"
#

# duplicating this here to avoid circular dependency
struct SampleSlfeOuts(
    string  sample,
    bam     bam_file,
    bam.bai bam_index_file,
    json    metrics_summary,
    csv     per_barcode_metrics,
    h5      molecule_info,
    h5      matrix_h5,
    path    matrix_mex,
    h5      all_genes_matrix_h5,
    path    all_genes_matrix_mex,
    csv     filtered_barcodes,
    csv     feature_reference,
    csv     target_panel,
)

#
# @include "_cr_lib_stages.mro"
#

struct ReadShards(
    shard[] valid_reads,
    shard[] corrected_reads,
    shard[] invalid_reads,
)

struct BarcodeReadComponent(
    string read_type,
    string kind,
    int    offset,
    int    length,
    string whitelist,
)

struct UmiReadComponent(
    string read_type,
    int    offset,
    int    length,
    int    min_length,
)

struct RnaReadComponent(
    string read_type,
    int    offset,
    int    length,
    int    min_length,
)

struct ChemistryDef(
    string                 name,
    string                 description,
    string                 endedness,
    string                 strandedness,
    BarcodeReadComponent[] barcode,
    UmiReadComponent       umi,
    RnaReadComponent       rna,
    RnaReadComponent       rna2,
)

struct SampleBamFile(
    string  sample,
    bam     bam_file,
    bam.bai bam_index_file,
)

struct SampleMetrics(
    string sample,
    json   summary,
    csv    per_barcode_metrics,
    map    per_lib_type_metrics,
)

struct VdjRefFastaFolder(
    fa regions,
    fa donor_regions,
)

struct VdjRefFolder(
    VdjRefFastaFolder fasta,
    json              reference,
)

struct DetectChemistryStageOutputs(
    string chemistry_type,
    bool   is_antibody_only,
)

struct GemWellVdjChemistry(
    string chemistry_type,
    string chain_type,
    string receptor,
)

struct BarcodeCompatibilityStageOutputs(
    string[] libraries_to_translate,
)

struct GemWellDetectChemistry(
    map[]                            sample_defs_count,
    DetectChemistryStageOutputs      detect_count_chem,
    GemWellVdjChemistry[]            detect_vdj_chem,
    BarcodeCompatibilityStageOutputs check_barcodes_compatibility,
)

struct GemWellFiles(
    int[]             gem_groups,
    asf[]             alignments,
    map[]             read_chunks,
    bui[]             bc_umi_info,
    bmsf[]            per_barcode_metrics_shard,
    ann.bincode.lz4[] annotation_files,
    string            target_set_name,
    path              bam_header,
    frf.bincode       slfe_feature_reference,
    string            barcode_whitelist,
)

struct FileOrBytes(
    file   file,
    string bytes,
)

struct CommonInputs(
    string sample_id,
    string sample_desc,
    string multi_config_sha,
)

struct CellCalling(
    int      recovered_cells,
    int      force_cells,
    json     cell_barcodes,
    string   override_mode,
    string[] override_library_types,
    bool     disable_ab_aggregate_detection,
)

struct CountInputs(
    map[]        sample_def,
    string       chemistry,
    ChemistryDef custom_chemistry_def,
    path         reference_path,
    json         gene_index,
    map[]        primers,
    CellCalling  cell_calling_config,
    float        subsample_rate,
    int          initial_reads,
    int          primer_initial_reads,
    string[]     special_genomic_regions,
    int          r1_length,
    int          r2_length,
    int          trim_polya_min_score,
    int          trim_tso_min_score,
    bool         no_secondary_analysis,
    bool         no_target_umi_filter,
    file         feature_reference,
    bool         include_introns,
    string       aligner,
    string       probe_barcodes_intended_pairing,
    map          genetic_demux_params,
    string       throughput,
    bool         enforce_library_concordance,
    bool         no_bam,
    json         force_sample_barcodes,
    bool         tenx_cmos,
)

struct VdjInputs(
    map[]        sample_def,
    string       chemistry,
    ChemistryDef custom_chemistry_def,
    map[]        primers,
    int          force_cells,
    float        subsample_rate,
    int          initial_reads,
    int          primer_initial_reads,
    string[]     special_genomic_regions,
    bool         denovo,
    int          r1_length,
    int          r2_length,
    path         ground_truth_clonotype_path,
    path         inner_enrichment_primers,
    string       chain_type,
    string       physical_library_id,
)

struct VdjGenInputs(
    path reference_path,
    path vdj_reference_path,
)

struct BasicPipelineConfig(
    bool disable_count,
    bool disable_vdj,
    bool disable_multi,
    bool disable_multi_count,
)

struct SampleMoleculeInfo(
    string sample,
    h5     h5_file,
    json   summary,
)

struct VdjAggrCsvLibrary(
    string library_id,
    path   vdj_contig_info,
    string donor,
    string origin,
    map    meta,
)

struct VdjAggrInput(
    VdjAggrCsvLibrary[] libraries,
)

struct VdjAggrResults(
    csv    clonotypes,
    fa     donor_ref_fa,
    fasta  consensus_fasta,
    csv    filtered_contig_annotations_csv,
    csv    consensus_annotations_csv,
    json   web_summary_data,
    vloupe vloupe,
)

#
# @include "_assign_tags_stages.mro"
#

struct AssignTagsOuts(
    json      sample_barcodes_json        "Maps sample names to list of barcodes"                                                                          "sample_barcodes.json",
    csv       tag_calls_per_cell          "Specifies tag assignments per cell"                                                                             "tag_calls_per_cell.csv",
    csv       tag_calls_summary           "Summarizes basic statistics about tag assignments"                                                              "tag_calls_summary",
    csv       assignment_confidence_table "Lists the posterior probabilities for tag assignments provided by JIBES model"                                  "assignment_confidence_table",
    json      cells_per_tag               "Provides a JSON that lists, for each tag, the cells it has been assigned to"                                    "cells_per_tag.json",
    json      non_tag_assignments         "Provides a JSON that lists, for each non-tag assignment (blanks/unassigned), the cells it has been assigned to" "non_tag_assignments.json",
    csv       tag_umi_thresholds_csv      "tag UMI thresholds csv"                                                                                         "tag_umi_thresholds_csv.csv",
    csv       marginal_tag_frequencies    "marginal_tag_frequencies"                                                                                       "marginal_tag_frequencies.csv",
    csv       jibes_model_summary         "jibes_model_summary"                                                                                            "jibes_model_summary.csv",
    json      jibes_parameters            "jibes_parameters"                                                                                               "jibes_parameters.json",
    json      jibes_summary_data          "jibes_summary_data"                                                                                             "jibes_summary_data.json",
    json      tag_call_metrics_json       "tag_call_metrics_json"                                                                                          "tag_call_metrics_json.json",
    json      tag_umi_thresholds_json     "tag_umi_thresholds_json"                                                                                        "tag_umi_thresholds_json.json",
    json      tag_contaminant_info        "tag_contaminant_info"                                                                                           "tag_contaminant_info.json",
    pickle    tag_assigner_pickle         "tag_assigner_pickle"                                                                                            "tag_assigner_pickle.pickle",
    map<json> sample_assignment_metrics   "Per-sample sample assignment summary metrics"                                                                   "sample_assignment_metrics.json",
)

#
# @include "_basic_sc_rna_counter_stages.mro"
#

struct SampleMatrices(
    string sample,
    h5     matrix_h5,
    path   matrix_mex,
    h5     all_genes_matrix_h5,
    path   all_genes_matrix_mex,
    csv    filtered_barcodes,
)

struct CellCalling(
    int      recovered_cells,
    int      force_cells,
    json     cell_barcodes,
    string   override_mode,
    string[] override_library_types,
    bool     disable_ab_aggregate_detection,
)

#
# @include "_sc_multi_defs.mro"
#

###############################################################################
# Pipeline configuration that tells you which pipelines among count and vdj
# needs to be disabled. Any sub-pipeline that has stages from both count and
# vdj will accept this struct as one of the inputs
###############################################################################
struct FullPipelineConfig(
    bool disable_count,
    bool disable_vdj_t,
    bool disable_vdj_b,
    bool has_no_vdj_ref,
    bool disable_multi,
    bool disable_multi_count,
)

struct VdjInputsCS(
    map[]  sample_def,
    bool   denovo,
    int    force_cells,
    path   inner_enrichment_primers,
    string chain_type,
    string physical_library_id,
    int    r1_length,
    int    r2_length,
)

struct CountInputsCS(
    map[]       sample_def,
    path        reference_path,
    json        gene_index,
    bool        no_bam,
    bool        no_secondary_analysis,
    bool        no_target_umi_filter,
    CellCalling cell_calling_config,
    string      chemistry,
    int         r1_length,
    int         r2_length,
    int         trim_polya_min_score,
    int         trim_tso_min_score,
    csv         feature_reference,
    bool        include_introns,
    string      aligner,
    json        force_sample_barcodes,
    bool        tenx_cmos,
)

struct GemWellInputs(
    CommonInputs common_inputs,
    CountInputs  count_inputs,
    VdjInputs[]  vdj_inputs,
)

struct VdjOutputsCS(
    csv       metrics_summary_csv             "Run summary CSV"                                  "metrics_summary.csv",
    csv       clonotypes                      "Clonotype info",
    fasta     filtered_contig_fasta           "Filtered contig sequences FASTA"                  "filtered_contig.fasta",
    fastq     filtered_contig_fastq           "Filtered contig sequences FASTQ"                  "filtered_contig.fastq",
    csv       filtered_contig_annotations_csv "Filtered contigs (CSV)"                           "filtered_contig_annotations.csv",
    fasta     all_contig_fasta                "All-contig FASTA"                                 "all_contig.fasta",
    fasta.fai all_contig_fasta_fai            "All-contig FASTA index"                           "all_contig.fasta.fai",
    fastq     all_contig_fastq                "All-contig FASTQ"                                 "all_contig.fastq",
    bam       all_contig_bam                  "Read-contig alignments"                           "all_contig.bam",
    bam.bai   all_contig_bam_bai              "Read-contig alignment index"                      "all_contig.bam.bai",
    json      all_contig_annotations_json     "All contig annotations (JSON)"                    "all_contig_annotations.json",
    bed       all_contig_annotations_bed      "All contig annotations (BED)"                     "all_contig_annotations.bed",
    csv       all_contig_annotations_csv      "All contig annotations (CSV)"                     "all_contig_annotations.csv",
    json      cell_barcodes                   "Barcodes that are declared to be targetted cells",
    fasta     consensus_fasta                 "Clonotype consensus FASTA"                        "consensus.fasta",
    fasta.fai consensus_fasta_fai             "Clonotype consensus FASTA index"                  "consensus.fasta.fai",
    bam       consensus_bam                   "Contig-consensus alignments"                      "consensus.bam",
    bam.bai   consensus_bam_bai               "Contig-consensus alignment index"                 "consensus.bam.bai",
    csv       consensus_annotations_csv       "Clonotype consensus annotations (CSV)"            "consensus_annotations.csv",
    fasta     concat_ref_fasta                "Concatenated reference sequences"                 "concat_ref.fasta",
    fasta.fai concat_ref_fasta_fai            "Concatenated reference index"                     "concat_ref.fasta.fai",
    bam       concat_ref_bam                  "Contig-reference alignments"                      "concat_ref.bam",
    bam.bai   concat_ref_bam_bai              "Contig-reference alignment index"                 "concat_ref.bam.bai",
    vloupe    vloupe                          "Loupe V(D)J Browser file"                         "vloupe.vloupe",
    tsv       airr_rearrangement              "AIRR Rearrangement TSV",
    pb        vdj_contig_info                 "All contig info (ProtoBuf format)",
)

struct CountOutputsCS(
    csv     metrics_summary               "Run summary CSV",
    bam     possorted_genome_bam          "BAM"                                      "possorted_genome_bam.bam",
    bam.bai possorted_genome_bam_index    "BAM index"                                "possorted_genome_bam.bam.bai",
    path    filtered_feature_bc_matrix    "Filtered feature-barcode matrices MEX",
    h5      filtered_feature_bc_matrix_h5 "Filtered feature-barcode matrices HDF5"   "filtered_feature_bc_matrix.h5",
    path    raw_feature_bc_matrix         "Unfiltered feature-barcode matrices MEX",
    h5      raw_feature_bc_matrix_h5      "Unfiltered feature-barcode matrices HDF5" "raw_feature_bc_matrix.h5",
    path    analysis                      "Secondary analysis output CSV",
    h5      molecule_info                 "Per-molecule read information",
    path    crispr_analysis               "CRISPR-specific analysis",
    path    multiplexing_analysis         "Multiplexing-specific analysis",
    cloupe  cloupe                        "Loupe Browser file",
    csv     feature_reference             "Feature Reference",
    csv     target_panel                  "Target Panel File",
)

struct MultiVdjOutputsCS(
    fasta     all_contig_fasta            "All-contig FASTA"              "all_contig.fasta",
    fasta.fai all_contig_fasta_fai        "All-contig FASTA index"        "all_contig.fasta.fai",
    fastq     all_contig_fastq            "All-contig FASTQ"              "all_contig.fastq",
    bam       all_contig_bam              "Read-contig alignments"        "all_contig.bam",
    bam.bai   all_contig_bam_bai          "Read-contig alignment index"   "all_contig.bam.bai",
    json      all_contig_annotations_json "All contig annotations (JSON)" "all_contig_annotations.json",
    bed       all_contig_annotations_bed  "All contig annotations (BED)"  "all_contig_annotations.bed",
    csv       all_contig_annotations_csv  "All contig annotations (CSV)"  "all_contig_annotations.csv",
)

struct MultiCountOutputsCS(
    csv     feature_reference_csv       "Feature reference file"                                              "feature_reference.csv",
    h5      raw_molecule_info_h5        "Molecule info file containing all molecules in the experiment"       "raw_molecule_info.h5",
    cloupe  raw_cloupe                  "Raw Loupe Browser file containing all molecules in the experiment",
    path    raw_feature_bc_matrix_mex   "Contains counts for all features and all barcodes in the experiment" "raw_feature_bc_matrix",
    h5      raw_feature_bc_matrix_h5    "Contains counts for all features and all barcodes in the experiment" "raw_feature_bc_matrix.h5",
    bam     unassigned_alignments       "Alignments from unassigned barcodes"                                 "unassigned_alignments.bam",
    bam.bai unassigned_alignments_index "Index for alignments from unassigned barcodes"                       "unassigned_alignments.bam.bai",
)

struct MultiplexingAnalysisCS(
    csv  tag_calls_per_cell          "Specifies tag assignments per cell"                                            "tag_calls_per_cell.csv",
    csv  tag_calls_summary           "Summarizes basic statistics about tag assignments"                             "tag_calls_summary.csv",
    csv  assignment_confidence_table "Lists the posterior probabilities for tag assignments provided by JIBES model" "assignment_confidence_table.csv",
    json cells_per_tag               "Provides a JSON that lists, for each tag, the cells it has been assigned to"   "cells_per_tag.json",
)

struct MultiOutputsCS(
    MultiplexingAnalysisCS multiplexing_analysis,
    MultiCountOutputsCS    count,
    MultiVdjOutputsCS      vdj_b,
    MultiVdjOutputsCS      vdj_t,
)

struct SampleVdjOutputsCS(
    csv       clonotypes                      "Clonotype info",
    fasta     filtered_contig_fasta           "Filtered contig sequences FASTA"                  "filtered_contig.fasta",
    fastq     filtered_contig_fastq           "Filtered contig sequences FASTQ"                  "filtered_contig.fastq",
    csv       filtered_contig_annotations_csv "Filtered contigs (CSV)"                           "filtered_contig_annotations.csv",
    json      cell_barcodes                   "Barcodes that are declared to be targetted cells",
    fasta     consensus_fasta                 "Clonotype consensus FASTA"                        "consensus.fasta",
    fasta.fai consensus_fasta_fai             "Clonotype consensus FASTA index"                  "consensus.fasta.fai",
    bam       consensus_bam                   "Contig-consensus alignments"                      "consensus.bam",
    bam.bai   consensus_bam_bai               "Contig-consensus alignment index"                 "consensus.bam.bai",
    csv       consensus_annotations_csv       "Clonotype consensus annotations (CSV)"            "consensus_annotations.csv",
    fasta     concat_ref_fasta                "Concatenated reference sequences"                 "concat_ref.fasta",
    fasta.fai concat_ref_fasta_fai            "Concatenated reference index"                     "concat_ref.fasta.fai",
    bam       concat_ref_bam                  "Contig-reference alignments"                      "concat_ref.bam",
    bam.bai   concat_ref_bam_bai              "Contig-reference alignment index"                 "concat_ref.bam.bai",
    vloupe    vloupe                          "Loupe V(D)J Browser file"                         "vloupe.vloupe",
    tsv       airr_rearrangement              "AIRR Rearrangement TSV",
    pb        vdj_contig_info                 "Contig info (ProtoBuf format)",
)

struct SampleCountOutputsCS(
    path    analysis                     "Secondary analysis output CSV",
    path    antibody_analysis            "Antibody analysis outputs",
    cloupe  cloupe                       "Loupe Browser File",
    path    crispr_analysis              "CRISPR analysis outputs",
    csv     feature_reference_csv        "Feature reference"                                               "feature_reference.csv",
    csv     sample_barcodes_csv          "Sample barcodes"                                                 "sample_barcodes.csv",
    path    sample_feature_bc_matrix_mex "Sample feature-barcode matrices MEX"                             "sample_feature_bc_matrix",
    h5      sample_feature_bc_matrix     "Sample feature-barcode matrices H5"                              "sample_feature_bc_matrix.h5",
    bam     sample_alignments            "BAM alignments for reads assigned to this sample"                "sample_alignments.bam",
    bam.bai sample_alignments_index      "BAM index for reads assigned to this sample"                     "sample_alignments.bam.bai",
    h5      sample_molecule_info         "Per-molecule read information for reads assigned to this sample",
    csv     target_panel                 "Target panel file",
)

struct SampleOutputsCS(
    SampleCountOutputsCS count,
    SampleVdjOutputsCS   vdj_b,
    SampleVdjOutputsCS   vdj_t,
    html                 web_summary,
    csv                  metrics_summary,
)

struct SampleSlfeOuts(
    string  sample,
    bam     bam_file,
    bam.bai bam_index_file,
    json    metrics_summary,
    csv     per_barcode_metrics,
    h5      molecule_info,
    h5      matrix_h5,
    path    matrix_mex,
    h5      all_genes_matrix_h5,
    path    all_genes_matrix_mex,
    csv     filtered_barcodes,
    csv     feature_reference,
    csv     target_panel,
)

###############################################################################
# Chemistry detection inputs
###############################################################################
# ducktypes from CountInputs
struct CountChemistryInputs(
    map[]        sample_def,
    path         reference_path,
    string       chemistry,
    int          r1_length,
    int          r2_length,
    bool         enforce_library_concordance,
    ChemistryDef custom_chemistry_def,
)

# ducktypes from VdjInputs
struct VdjChemistryInputs(
    map[]        sample_def,
    string       chemistry,
    int          r1_length,
    int          r2_length,
    string       chain_type,
    ChemistryDef custom_chemistry_def,
)

# ducktypes from GemWellInputsCS, GemWellInputsPD
struct GemWellChemistryInputs(
    CommonInputs         common_inputs,
    CountChemistryInputs count_inputs,
    VdjChemistryInputs[] vdj_inputs,
)

###############################################################################
# Gem well processor inputs
###############################################################################
# In Count, the basic rna counter is run for each gem well
struct CounterInputs(
    map[]        sample_def,
    path         reference_path,
    json         gene_index,
    csv          feature_reference,
    ChemistryDef custom_chemistry_def,
    CellCalling  cell_calling_config,
    float        subsample_rate,
    int          initial_reads,
    map[]        primers,
    int          r1_length,
    int          r2_length,
    int          trim_polya_min_score,
    int          trim_tso_min_score,
    bool         include_introns,
    string       aligner,
    bool         no_target_umi_filter,
    bool         enforce_library_concordance,
    bool         no_bam,
    json         force_sample_barcodes,
)

# In VDJ, the contig assembler is run for each gem well
struct VdjAssemblerInputs(
    map[]        sample_def,
    ChemistryDef custom_chemistry_def,
    int          r1_length,
    int          r2_length,
    int          initial_reads,
    float        subsample_rate,
    bool         denovo,
    int          force_cells,
    path         inner_enrichment_primers,
)

###############################################################################
# Outputs from VDJ reporter
###############################################################################
struct VdjReport(
    pb        vdj_contig_info,
    vloupe    vloupe,
    string    receptor,
    json      metrics_summary_json,
    csv       metrics_summary_csv,
    html      web_summary,
    json      web_summary_data,
    fastq     contig_fastq,
    fastq     filtered_contig_fastq,
    fasta     contig_fasta,
    fasta.fai contig_fasta_fai,
    fasta     filtered_contig_fasta,
    bed       annotations_bed,
    json      cell_barcodes,
    json      productive_cell_barcodes,
)

###############################################################################
# Merge GEM Wells
##############################################################################

struct CountAggrSampleDef(
    string library_id,
    h5     molecule_h5,
)

#
# @include "sc_multi_cs.mro"
#

# The subset of CountInputs which are not overridden in the CS pipeline.
struct CountInputsMinimal(
    map[]       sample_def,
    string      chemistry,
    path        reference_path,
    CellCalling cell_calling_config,
    int         r1_length,
    int         r2_length,
    bool        no_bam,
    bool        no_secondary_analysis,
    bool        no_target_umi_filter,
    file        feature_reference,
    bool        include_introns,
    json        force_sample_barcodes,
    bool        tenx_cmos,
)

#
# @include "_common_stages.mro"
#

stage CELLRANGER_PREFLIGHT(
    in  bool  full_check,
    in  map[] sample_def,
    in  path  reference_path,
    in  csv   feature_reference,
    in  int   recovered_cells,
    in  int   force_cells,
    in  int   r1_length,
    in  int   r2_length,
    src py    "../rna/stages/common/cellranger_preflight",
) using (
    mem_gb   = 8,
    volatile = strict,
)

stage DISABLE_FEATURE_STAGES(
    in  map[]               sample_def,
    in  bool                disable_multi,
    in  bool                disable_count,
    in  bool                is_pd,
    in  bool                in_disable_targeted,
    in  map<SampleSlfeOuts> sample_outs,
    in  json                multi_graph,
    out bool                disable_crispr,
    out bool                disable_antibody,
    out bool                disable_multiplexing,
    out bool                disable_targeted,
    out bool                disable_legacy_stages,
    out bool                disable_library_cloupe,
    src py                  "../rna/stages/common/disable_feature_stages",
)

stage PARSE_TARGET_FEATURES(
    in  map[]  sample_def,
    in  path   reference_path,
    in  json   gene_index,
    in  bool   no_target_umi_filter,
    in  int    rps_limit,
    out fa     bait_fasta,
    out csv    target_panel,
    out csv    target_gene_indices,
    out bool   disable_targeted,
    out bool   disable_target_umi_filter,
    out int    rps_limit,
    out string target_set_name,
    out json   summary,
    src py     "../rna/stages/common/parse_target_features",
) using (
    mem_gb = 4,
)

#
# @include "_cr_lib_stages.mro"
#

stage ALIGN_AND_COUNT(
    in  int               gem_well,
    in  map[]             read_chunks,
    in  path              reference_path,
    in  ReadShards        read_shards,
    in  fbc.bincode       feature_counts,
    in  frf.bincode       feature_reference,
    in  csv               target_set,
    in  ChemistryDef      chemistry_def,
    in  string            aligner,
    in  float             aligner_subsample_rate,
    in  bool              include_introns,
    in  bool              is_pd,
    in  int               targeted_umi_min_read_count,
    in  int               transcriptome_min_score,
    in  int               trim_polya_min_score,
    in  int               trim_tso_min_score,
    in  tbcc.bincode      total_barcode_counts,
    in  blf.json          barcode_subset,
    out csf[]             counts_bc_order,
    out csf[]             counts_feature_order,
    out bui[]             bc_umi_info,
    out asf[]             pos_sorted,
    out path              bam_header,
    out csv               barcode_summary,
    out ann.bincode.lz4[] annotation_files,
    out bmsf[]            per_barcode_metrics,
    src comp              "cr_lib martian align_and_count",
) split (
    in  map               range,
    in  float             read_ann_subsample_rate,
    out csf               counts_bc_order_shard,
    out csf               counts_feature_order_shard,
    out bui               bc_umi_info_shard,
    out asf               pos_sorted_shard,
    out bsf.bincode       barcode_summary_shard,
    out ann.bincode.lz4[] read_ann_files,
    out bmsf              metrics_shard,
) using (
    mem_gb   = 4,
    volatile = strict,
)

stage WRITE_POS_BAM(
    in  path            bam_header,
    in  asf[]           alignments,
    in  map[]           read_chunks,
    in  string          target_set_name,
    in  json            sample_barcodes_json,
    in  bool            no_bam,
    out SampleBamFile   pos_sorted_bam,
    out SampleBamFile[] multi_pos_sorted_bam,
    src comp            "cr_lib martian write_pos_bam",
) split (
    in  map             range,
    in  bool            write_header,
    out map             sample_pos_sorted_bam_chunks,
) using (
    volatile = strict,
)

stage BARCODE_CORRECTION(
    in  int          gem_well,
    in  shard[]      invalid_uncorrected,
    in  ChemistryDef chemistry_def,
    in  bsc.bincode  barcode_segment_counts,
    in  bcc.bincode  barcode_counts,
    in  bcm.bincode  valid_read_metrics,
    in  string[]     libraries_to_translate,
    out shard[]      valid_corrected,
    out shard[]      invalid,
    out json         summary,
    out bcc.bincode  corrected_barcode_counts,
    out tbcc.bincode total_barcode_counts,
    src comp         "cr_lib martian barcode_correction",
) split (
    in  map          range,
    out shard        valid_shard,
    out shard        invalid_shard,
    out bcm.bincode  chunk_summary,
) using (
    mem_gb   = 4,
    threads  = 4,
    volatile = strict,
)

stage WRITE_BARCODE_SUMMARY(
    in  int[]       unique_gem_groups,
    in  bui[]       bc_umi_info,
    in  frf.bincode feature_reference,
    in  bi.bincode  barcode_index,
    out h5          barcode_summary,
    src comp        "cr_lib martian write_barcode_summary",
) using (
    mem_gb   = 8,
    threads  = 1,
    volatile = strict,
)

stage COLLATE_METRICS(
    in  bmsf[]          per_barcode_metrics,
    in  path            reference_path,
    in  frf.bincode     feature_reference,
    in  json            sample_barcodes_json,
    out json            summary,
    out csv             per_barcode_metrics,
    out map             per_lib_type_metrics,
    out SampleMetrics[] multi_metrics,
    src comp            "cr_lib martian collate_metrics",
) split (
    in  string          sample,
) using (
    mem_gb   = 1,
    volatile = strict,
)

stage WRITE_H5_MATRIX(
    in  int          gem_well,
    in  csf[]        counts,
    in  frf.bincode  feature_reference,
    in  ChemistryDef chemistry_def,
    in  string       sample_id,
    in  bi.bincode   barcode_index,
    out h5           matrix,
    src comp         "cr_lib martian write_h5_matrix",
) using (
    mem_gb   = 2,
    threads  = 1,
    volatile = strict,
)

stage WRITE_MATRIX_MARKET(
    in  csf[]       counts,
    in  frf.bincode feature_reference,
    in  bi.bincode  barcode_index,
    out path        feature_bc_matrix,
    src comp        "cr_lib martian write_matrix_market",
) using (
    mem_gb   = 2,
    threads  = 1,
    volatile = strict,
)

stage COPY_VDJ_REFERENCE(
    in  path         vdj_reference_path,
    in  fa           vdj_t_donor_ref_fa,
    in  fa           vdj_b_donor_ref_fa,
    out VdjRefFolder vdj_reference,
    src comp         "cr_lib martian copy_vdj_reference",
)

stage DETECT_CHEMISTRY(
    in  map[]    sample_def,
    in  path     reference_path,
    in  string   chemistry_name_spec,
    in  string[] allowed_chems,
    in  int      r1_length,
    in  int      r2_length,
    out string   chemistry_type,
    out bool     is_antibody_only,
    src comp     "cr_lib martian detect_chemistry",
) using (
    mem_gb   = 20,
    volatile = strict,
)

stage COMBINE_GEM_WELL_CHEMISTRIES(
    in  GemWellDetectChemistry[] gem_well_detect_chemistry,
    out map                      library_to_chemistry,
    out string[]                 libraries_to_translate,
    out bool                     is_antibody_only,
    out GemWellDetectChemistry   legacy,
    src comp                     "cr_lib martian combine_gem_well_chemistries",
)

stage CHECK_BARCODES_COMPATIBILITY_VDJ(
    in  string       vdj_chemistry_name,
    in  map[]        vdj_sample_def,
    in  string       gex_chemistry_name,
    in  map[]        gex_sample_def,
    in  ChemistryDef gex_custom_chemistry_def,
    in  ChemistryDef vdj_custom_chemistry_def,
    in  bool         enforce_library_concordance,
    out float        similarity_score,
    src comp         "cr_lib martian vdj_gex_bc_compat",
)

stage CHECK_BARCODES_COMPATIBILITY(
    in  string       chemistry_name,
    in  ChemistryDef custom_chemistry_def,
    in  map[]        sample_def,
    in  bool         enforce_library_concordance,
    out string[]     libraries_to_translate,
    src comp         "cr_lib martian barcode_compatibility",
)

stage DETECT_VDJ_RECEPTOR(
    in  string force_receptor,
    in  path   vdj_reference_path,
    in  string chemistry,
    in  map[]  sample_def,
    out string receptor,
    src comp   "cr_lib martian detect_vdj_receptor",
) using (
    mem_gb   = 1,
    volatile = strict,
)

stage MAKE_SHARD(
    in  ChemistryDef chemistry_def,
    in  int          gem_well,
    in  map[]        read_chunks,
    in  int          r1_length,
    in  int          r2_length,
    in  float        subsample_rate,
    in  int          initial_read_pairs,
    in  path         reference_path,
    in  csv          feature_reference_path,
    in  csv          target_features,
    in  string       target_set_name,
    in  string[]     libraries_to_translate,
    out shard[]      valid,
    out shard[]      invalid,
    out bcc.bincode  barcode_counts,
    out bsc.bincode  barcode_segment_counts,
    out fbc.bincode  feature_counts,
    out json         summary,
    out int          total_read_pairs,
    out bool         paired_end,
    out frf.bincode  feature_reference,
    out bcm.bincode  bc_correct_summary,
    out smf.json     sequencing_metrics,
    src comp         "cr_lib martian make_shard",
) split (
    in  int          chunk_id,
    in  frf.bincode  feature_reference,
    out shard        valid_shard,
    out shard        invalid_shard,
    out rpc          read_prefix_counts,
    out umi          umi_counts,
    out msm.bincode  chunk_summary,
    out msh.bincode  chunk_hist,
) using (
    mem_gb   = 4,
    threads  = 4,
    volatile = strict,
)

stage MERGE_GEM_WELL_FILES(
    in  GemWellFiles[] unmerged_gem_well_files,
    out GemWellFiles   merged_gem_well_files,
    src comp           "cr_lib martian merge_gem_well_files",
) using (
    volatile = strict,
)

stage MERGE_METRICS(
    in  json[] summaries,
    out json   summary,
    src comp   "cr_lib martian merge_metrics",
) using (
    volatile = strict,
)

stage MULTI_PREFLIGHT(
    in  FileOrBytes config,
    in  bool        is_pd,
    src comp        "cr_lib martian multi_preflight",
) using (
    mem_gb   = 1,
    threads  = 1,
    volatile = strict,
)

stage WRITE_MULTI_WEB_SUMMARY_JSON(
    in  map          per_sample_metrics,
    in  json         library_metrics,
    in  smf.json     sequencing_metrics,
    in  csv          multi_config,
    in  json         multi_graph,
    in  svg          multi_graph_svg,
    in  CommonInputs common_inputs,
    in  CountInputs  count_inputs,
    in  json         tag_contaminant_info,
    in  map          sample_tsne_plots,
    in  map          barcode_rank_plots,
    in  json         jibes_biplot_histogram,
    in  json         targeted_plot,
    in  json         cmo_tsne_plot,
    in  vwc.json     vdj_t_contents,
    in  vwc.json     vdj_b_contents,
    in  string       target_set_name,
    out map          web_summary_json,
    out map          metrics_summary_csv,
    src comp         "cr_lib martian write_multi_web_summary_json",
) using (
    volatile = strict,
) retain (
    web_summary_json,
)

stage BUILD_VDJ_WS_CONTENTS(
    in  json         metrics_summary,
    in  string       receptor,
    in  VdjInputs    vdj_inputs,
    in  VdjGenInputs vdj_gen_inputs,
    in  smf.json     sequencing_metrics,
    in  json         vdj_ws_json,
    out vwc.json     vdj_ws_contents,
    src comp         "cr_lib martian build_vdj_ws_contents",
) using (
    volatile = strict,
)

stage PARSE_MULTI_CONFIG(
    in  string              sample_id,
    in  string              sample_desc,
    in  FileOrBytes         config,
    in  string              config_hash,
    in  map                 params,
    in  bool                is_pd,
    out CommonInputs        common_input,
    out CountInputs         count_input,
    out VdjInputs[]         vdj_inputs,
    out VdjGenInputs        vdj_gen_inputs,
    out BasicPipelineConfig basic_config,
    out csv                 config_file,
    out json                multi_graph,
    out csv                 feature_ref,
    src comp                "cr_lib martian parse_multi_config",
) using (
    mem_gb   = 6,
    threads  = 1,
    volatile = strict,
) retain (
    feature_ref,
)

stage RUST_BRIDGE(
    in  int           gem_well,
    in  shard[]       valid_uncorrected,
    in  shard[]       valid_corrected,
    in  bcc.bincode   raw_barcode_counts,
    in  bcc.bincode   corrected_barcode_counts,
    in  bool          paired_end,
    out bincode.lz4[] bc_sorted_rna_reads,
    out int[]         gem_groups,
    out json[]        barcodes,
    out json          raw_barcode_counts_json,
    out json          corrected_barcode_counts_json,
    out int           n50_n50_rpu,
    out int           processed_read_pairs,
    src comp          "cr_lib martian rust_bridge",
) split (
    in  map           range,
    in  shard[]       valid_shards,
    out bincode.lz4   chunk_bc_sorted_rna_reads,
    out json          barcodes_shard,
    out bincode       n50s_shard,
) using (
    mem_gb = 4,
)

stage SET_ALIGNER_SUBSAMPLE_RATE(
    in  json        barcodes_under_tissue,
    in  bcc.bincode corrected_barcode_counts,
    in  int         rps_limit,
    out float       aligner_subsample_rate,
    src comp        "cr_lib martian set_aligner_subsample_rate",
) using (
    mem_gb   = 8,
    threads  = 1,
    volatile = strict,
)

stage SET_TARGETED_UMI_FILTER(
    in  bui[]       bc_umi_info,
    in  frf.bincode feature_reference,
    out int         umi_read_count_threshold,
    out json        summary,
    src comp        "cr_lib martian set_targeted_umi_filter",
) using (
    mem_gb   = 8,
    threads  = 1,
    volatile = strict,
)

stage MULTI_SETUP_CHUNKS(
    in  string       sample_id,
    in  map[]        sample_def,
    in  string       chemistry_name,
    in  ChemistryDef custom_chemistry_def,
    in  string       default_library_type,
    out map[]        chunks,
    out ChemistryDef chemistry_def,
    out string       barcode_whitelist,
    src comp         "cr_lib martian setup_chunks",
)

stage SUBSAMPLE_BARCODES(
    in  bcc.bincode corrected_barcode_counts,
    out blf.json    barcode_subset,
    src comp        "cr_lib martian subsample_barcodes",
) using (
    mem_gb   = 4,
    threads  = 1,
    volatile = strict,
)

stage WRITE_BARCODE_INDEX(
    in  bcc.bincode barcode_counts,
    in  json        barcodes_under_tissue,
    out bi.bincode  barcode_index,
    src comp        "cr_lib martian write_barcode_index",
) using (
    mem_gb   = 4,
    threads  = 1,
    volatile = strict,
)

stage WRITE_GENE_INDEX(
    in  path reference_path,
    out json gene_index,
    src comp "cr_lib martian write_gene_index",
) using (
    mem_gb   = 6,
    threads  = 1,
    volatile = strict,
)

stage WRITE_MOLECULE_INFO(
    in  int                  gem_well,
    in  bui[]                counts_bc_order,
    in  path                 reference_path,
    in  map[]                read_chunks,
    in  frf.bincode          feature_reference,
    in  csv                  filtered_barcodes,
    in  json                 target_panel_summary,
    in  string               target_set_name,
    in  json                 matrix_computer_summary,
    in  int                  recovered_cells,
    in  int                  force_cells,
    in  bool                 include_introns,
    in  string               multi_config_sha,
    in  json                 sample_barcodes_json,
    in  SampleMetrics[]      per_sample_metrics,
    in  bi.bincode           barcode_index,
    out SampleMoleculeInfo   single_mol_info,
    out SampleMoleculeInfo[] multi_mol_info,
    src comp                 "cr_lib martian write_molecule_info",
) using (
    mem_gb   = 16,
    threads  = 1,
    volatile = strict,
)

stage RUN_ENCLONE(
    in  path   vdj_reference_path,
    in  json   contig_annotations,
    in  string receptor,
    out json   summary,
    out pb     enclone_output,
    out fa     donor_ref_fa,
    out bool   disable_vloupe,
    src comp   "cr_lib martian assigner",
) using (
    mem_gb  = 5,
    threads = -4,
)

stage WRITE_CLONOTYPE_OUTS(
    in  string receptor,
    in  pb     enclone_output,
    out csv    clonotypes_csv,
    src comp   "cr_lib martian write_clonotype_outs",
) using (
    mem_gb = 8,
)

stage FILL_CLONOTYPE_INFO(
    in  json contig_annotations,
    in  pb   enclone_output,
    out json all_contig_annotations_json,
    src comp "cr_lib martian fill_clonotype_info",
)

stage HANDLE_NO_VDJ_REF(
    in  json asm_contig_json,
    in  json clonotype_contig_json,
    in  bool has_no_vdj_ref,
    out json final_contig_annotations,
    src comp "cr_lib martian handle_no_ref",
)

stage WRITE_CONCAT_REF_OUTS(
    in  pb        enclone_output,
    in  json      all_contig_annotations_json,
    out bam       concat_ref_bam,
    out bam.bai   concat_ref_bam_bai,
    out fasta     concat_ref_fasta,
    out fasta.fai concat_ref_fasta_fai,
    src comp      "cr_lib martian write_concat_ref_outs",
) using (
    mem_gb  = 4,
    threads = 4,
)

stage WRITE_CONSENSUS_BAM(
    in  pb      enclone_output,
    in  json    all_contig_annotations_json,
    out bam     consensus_bam,
    out bam.bai consensus_bam_bai,
    src comp    "cr_lib martian write_consensus_bam",
) using (
    mem_gb  = 4,
    threads = 4,
)

stage WRITE_CONSENSUS_TXT(
    in  pb        enclone_output,
    out fasta     consensus_fasta,
    out fasta.fai consensus_fasta_fai,
    out csv       consensus_annotations_csv,
    src comp      "cr_lib martian write_consensus_txt",
) using (
    mem_gb  = 4,
    threads = 4,
)

stage ASSEMBLE_VDJ(
    in  bincode.lz4[] bc_sorted_rna_reads,
    in  bool          paired_end,
    in  path          vdj_reference_path,
    in  string        receptor,
    in  int           n50_n50_rpu,
    in  int           npairs,
    in  bool          denovo,
    in  int           force_cells,
    in  path          inner_enrichment_primers,
    in  int           total_read_pairs,
    in  json          corrected_bc_counts,
    out bam           contig_bam,
    out bam.bai       contig_bam_bai,
    out tsv           summary_tsv,
    out tsv           umi_summary_tsv,
    out json          metrics_summary_json,
    out json          contig_annotations,
    out csv           barcode_support,
    out json[]        barcodes_in_chunks,
    out arp.bincode   assemblable_reads_per_bc,
    out txt           align_info,
    out fastq         unmapped_sample_fastq,
    out txt           report,
    src comp          "cr_lib martian assembly",
) split (
    in  bincode.lz4   chunk_rna_reads,
    in  bool          perf_track,
    in  int           chunk_id,
    out json          barcodes_in_chunk,
    out bin           barcode_data,
    out bin           barcode_data_sum,
    out bin           barcode_data_brief,
    out bincode       outs_builder,
)

stage CREATE_AIRR_TSV(
    in  json  contig_annotations,
    in  fasta concat_ref_fasta,
    out tsv   airr_annotations,
    src comp  "cr_lib martian airr_filter",
)

stage WRITE_CONTIG_OUTS(
    in  json        contig_annotations,
    in  int         total_read_pairs,
    in  json        corrected_bc_counts,
    in  arp.bincode assemblable_reads_per_bc,
    out fastq       contig_fastq,
    out fastq       filtered_contig_fastq,
    out fasta       contig_fasta,
    out fasta.fai   contig_fasta_fai,
    out fasta       filtered_contig_fasta,
    out bed         annotations_bed,
    out json        cell_barcodes,
    out json        paired_cell_barcodes,
    out json        summary,
    src comp        "cr_lib martian write_contig_outs",
)

stage HANDLE_GEX_CELLS(
    in  json asm_contig_annotations,
    in  csv  filtered_barcodes,
    in  bool is_antibody_only,
    in  bool is_non_targeted_gex,
    out json contig_annotations,
    src comp "cr_lib martian handle_gex_cells",
)

stage WRITE_ANN_CSV(
    in  json all_contig_annotations_json,
    out csv  all_contig_annotations_csv,
    out csv  filtered_contig_annotations_csv,
    src comp "cr_lib martian write_ann_csv",
)

stage PROCESS_VDJ_PROTO(
    in  VdjAggrCsvLibrary[] libraries,
    in  map                 count_gem_well_map,
    out string              receptor,
    out map                 gem_well_map,
    src comp                "cr_lib martian process_vdj_proto",
)

stage SETUP_VDJ_AGGR(
    in  VdjAggrCsvLibrary[] libraries,
    in  map                 gem_well_map,
    in  string              receptor,
    out json[]              contig_ann_json_files,
    out csv                 enclone_input_csv,
    out em.json             enclone_gem_well_meta,
    out path                vdj_reference_path,
    out json                combined_ann_json,
    src comp                "cr_lib martian setup_vdj_aggr",
) split (
    in  int                 chunk_id,
    out json                chunk_ann_json,
    out map                 enclone_meta_row,
    out map                 enclone_gem_well_info,
)

stage RUN_ENCLONE_AGGR(
    in  json[]  contig_ann_json_files,
    in  csv     enclone_input_csv,
    in  em.json enclone_gem_well_meta,
    in  path    vdj_reference_path,
    out pb      enclone_output,
    out fa      donor_ref_fa,
    src comp    "cr_lib martian run_enclone_aggr",
) using (
    mem_gb  = 9,
    threads = 4,
)

stage PARSE_AGGR_CSV(
    in  path           pipestance_root,
    in  csv            aggregation_csv,
    out csv            aggregation_csv,
    out map[]          count_libraries,
    out VdjAggrInput[] vdj_aggr_inputs,
    out bool           disable_count_aggr,
    out bool           disable_vdj_aggr,
    src comp           "cr_lib martian parse_aggr_csv",
)

stage WRITE_CONTIG_PROTO(
    in  path   vdj_reference_path,
    in  json   contig_annotations_json,
    in  json   metrics_summary_json,
    in  string receptor,
    in  int[]  gem_wells,
    in  json   cell_barcodes,
    in  string sample_id,
    in  string sample_desc,
    in  string multi_config_sha,
    out pb     vdj_contig_info,
    src comp   "cr_lib martian write_contig_proto",
)

stage MATCH_VDJ_AGGR_OUTS(
    in  string[]       receptors,
    in  csv[]          clonotypes,
    in  fa[]           donor_ref_fas,
    in  fasta[]        consensus_fastas,
    in  path[]         vdj_reference_paths,
    in  csv[]          filtered_contig_annotations_csvs,
    in  csv[]          consensus_annotations_csvs,
    in  json[]         web_summary_data,
    in  vloupe[]       vloupes,
    out VdjAggrResults vdj_t_results,
    out VdjAggrResults vdj_b_results,
    out path           vdj_reference_path,
    src comp           "cr_lib martian match_vdj_outs",
)

stage WRITE_AGGR_ANN(
    in  em.json enclone_gem_well_meta,
    in  csv     annotation_csv,
    out csv     augmented_annotation_csv,
    src comp    "cr_lib martian write_aggr_ann",
)

stage WRITE_WEB_SUMMARY_JSON(
    in  path                vdj_reference_path,
    in  VdjAggrCsvLibrary[] libraries,
    in  pb                  enclone_output,
    in  em.json             enclone_gem_well_meta,
    in  string              sample_id,
    in  string              sample_desc,
    in  csv                 clonotypes_csv,
    in  string              receptor,
    out json                web_summary_content,
    out json                per_origin_hist,
    src comp                "cr_lib martian write_ws_json",
)

#
# @include "_run_fbpca_stage.mro"
#

stage RUN_FBPCA(
    in  h5     matrix_h5,
    in  map[]  library_info,
    in  int    num_pcs,
    in  bool   skip,
    in  bool   is_antibody_only,
    out pickle dimred_matrix,
    out pickle matrix_barcode_feature_info,
    src py     "../rna/stages/analyzer/run_fbpca",
) split (
)

#
# @include "_sc_rna_analyzer_stages.mro"
#

stage ANALYZER_PREFLIGHT(
    in  bool   skip,
    in  h5     filtered_matrices_h5,
    in  csv    use_genes,
    in  csv    exclude_genes,
    in  csv    use_bcs,
    in  int    num_analysis_bcs,
    in  int    force_cells,
    in  int    random_seed,
    in  int    num_pca_bcs,
    in  int    num_pca_genes,
    in  int    num_principal_comps,
    in  int    cbc_knn,
    in  float  cbc_alpha,
    in  float  cbc_sigma,
    in  bool   cbc_realign_panorama,
    in  int    max_clusters,
    in  int    graphclust_neighbors,
    in  float  neighbor_a,
    in  float  neighbor_b,
    in  int    tsne_perplexity,
    in  int    tsne_input_pcs,
    in  int    tsne_max_dims,
    in  int    tsne_max_iter,
    in  int    tsne_stop_lying_iter,
    in  int    tsne_mom_switch_iter,
    in  float  tsne_theta,
    in  int    umap_n_neighbors,
    in  int    umap_input_pcs,
    in  int    umap_max_dims,
    in  float  umap_min_dist,
    in  string umap_metric,
    out bool   skip,
    out bool   is_antibody_only,
    src py     "../rna/stages/analyzer/analyzer_preflight",
) using (
    volatile = strict,
)

stage REANALYZER_PREFLIGHT(
    in  h5 filtered_matrices_h5,
    src py "../rna/stages/analyzer/reanalyzer_preflight",
) using (
    volatile = strict,
)

stage REANALYZE_VERIFY_SAMPLE_IDS(
    in  h5    matrix_h5,
    in  map[] sample_defs,
    out map[] sample_defs,
    src py    "../rna/stages/analyzer/reanalyze_verify_sample_ids",
)

stage PREPROCESS_MATRIX(
    in  h5   matrix_h5,
    in  bool skip,
    in  int  random_seed,
    in  csv  use_genes,
    in  csv  exclude_genes,
    in  csv  use_bcs,
    in  int  num_bcs,
    in  int  force_cells,
    in  bool is_antibody_only,
    out h5   cloupe_matrix_h5,
    out h5   preprocessed_matrix_h5,
    out bool is_multi_genome,
    src py   "../rna/stages/analyzer/preprocess_matrix",
) split (
) using (
    volatile = strict,
)

stage RUN_MULTIGENOME_ANALYSIS(
    in  h5   filtered_matrices_h5,
    in  bool is_multi_genome,
    in  bool skip,
    out path multi_genome_csv,
    out path multi_genome_json,
    out json multi_genome_summary,
    src py   "../rna/stages/analyzer/run_multigenome_analysis",
) split (
) using (
    volatile = strict,
)

stage RUN_PCA(
    in  h5   matrix_h5,
    in  bool skip,
    in  int  random_seed,
    in  int  num_bcs,
    in  int  num_genes,
    in  int  num_pcs,
    in  bool is_antibody_only,
    out h5   pca_h5,
    out path pca_csv,
    src py   "../rna/stages/analyzer/run_pca",
) split (
) using (
    volatile = strict,
)

stage RUN_KMEANS(
    in  h5   matrix_h5,
    in  h5   pca_h5,
    in  bool skip,
    in  int  random_seed,
    in  int  max_clusters,
    in  int  num_bcs,
    in  int  num_pcs,
    out h5   kmeans_h5,
    out path kmeans_csv,
    src py   "../rna/stages/analyzer/run_kmeans",
) split (
    in  int  n_clusters,
) using (
    volatile = strict,
)

stage RUN_GRAPH_CLUSTERING(
    in  h5     matrix_h5,
    in  h5     pca_h5,
    in  int    num_neighbors       "Use this many neighbors",
    in  float  neighbor_a          "Use larger of (a+b*log10(n_cells) neighbors or num_neighbors",
    in  float  neighbor_b          "Use larger of (a+b*log10(n_cells) neighbors or num_neighbors",
    in  int    num_bcs             "Use this many cell-barcodes in clustering",
    in  int    input_pcs           "Use top N PCs",
    in  int    balltree_leaf_size,
    in  string similarity_type     "Type of similarity to use (nn or snn)",
    in  bool   skip,
    out h5     chunked_neighbors,
    out h5     clusters_h5,
    out path   clusters_csv,
    src py     "../rna/stages/analyzer/run_graph_clustering",
) split (
    in  pickle neighbor_index,
    in  h5     submatrix,
    in  int    row_start,
    in  int    total_rows,
    in  int    k_nearest,
    in  h5     use_bcs,
) using (
    volatile = strict,
)

stage MERGE_CLUSTERS(
    in  h5   matrix_h5,
    in  h5   pca_h5,
    in  h5   clusters_h5,
    in  bool skip,
    out h5   clusters_h5,
    out path clusters_csv,
    src py   "../rna/stages/analyzer/merge_clusters",
) split (
) using (
    volatile = strict,
)

stage COMBINE_CLUSTERING(
    in  bool skip,
    in  h5   kmeans_h5,
    in  path kmeans_csv,
    in  h5   graphclust_h5,
    in  path graphclust_csv,
    out h5   clustering_h5,
    out path clustering_csv,
    src py   "../rna/stages/analyzer/combine_clustering",
) using (
    volatile = strict,
)

stage RUN_DIFFERENTIAL_EXPRESSION(
    in  h5     matrix_h5,
    in  h5     clustering_h5,
    in  bool   skip,
    in  int    random_seed,
    in  int    max_clusters,
    in  bool   is_antibody_only,
    out h5     diffexp_h5,
    out path   diffexp_csv,
    src py     "../rna/stages/analyzer/run_differential_expression",
) split (
    in  string clustering_key,
) using (
    volatile = strict,
)

stage RUN_TSNE(
    in  h5     matrix_h5,
    in  h5     pca_h5,
    in  bool   skip,
    in  int    random_seed,
    in  int    perplexity,
    in  int    input_pcs,
    in  int    max_dims,
    in  int    max_iter,
    in  int    stop_lying_iter,
    in  int    mom_switch_iter,
    in  float  theta,
    in  bool   is_antibody_only,
    out h5     tsne_h5,
    out path   tsne_csv,
    src py     "../rna/stages/analyzer/run_tsne",
) split (
    in  int    tsne_dims,
    in  string feature_type,
) using (
    volatile = strict,
)

stage RUN_UMAP(
    in  h5     matrix_h5,
    in  h5     pca_h5,
    in  bool   skip,
    in  int    random_seed,
    in  int    n_neighbors,
    in  int    input_pcs,
    in  int    max_dims,
    in  float  min_dist,
    in  string metric,
    in  bool   is_antibody_only,
    out h5     umap_h5,
    out path   umap_csv,
    src py     "../rna/stages/analyzer/run_umap",
) split (
    in  int    umap_dims,
    in  string feature_type,
) using (
    volatile = strict,
)

stage SUMMARIZE_ANALYSIS(
    in  h5    matrix_h5,
    in  h5    pca_h5,
    in  h5    clustering_h5,
    in  h5    diffexp_h5,
    in  h5    tsne_h5,
    in  h5    umap_h5,
    in  path  pca_csv,
    in  path  clustering_csv,
    in  path  diffexp_csv,
    in  path  tsne_csv,
    in  path  umap_csv,
    in  json  multi_genome_summary,
    in  path  multi_genome_csv,
    in  path  multi_genome_json,
    in  bool  is_multi_genome,
    in  bool  chemistry_batch_correction,
    in  float batch_score_before_correction,
    in  float batch_score_after_correction,
    in  bool  skip,
    out path  analysis,
    out path  analysis_csv,
    out json  summary,
    src py    "../rna/stages/analyzer/summarize_analysis",
) split (
) using (
    volatile = strict,
)

stage PARSE_PARAM_CSV(
    in  csv    params_csv,
    out csv    params_csv,
    out int    num_analysis_bcs,
    out int    random_seed,
    out int    num_pca_bcs,
    out int    num_pca_genes,
    out int    num_principal_comps,
    out int    cbc_knn,
    out float  cbc_alpha,
    out float  cbc_sigma,
    out bool   cbc_realign_panorama,
    out int    max_clusters,
    out int    graphclust_neighbors,
    out float  neighbor_a,
    out float  neighbor_b,
    out int    tsne_perplexity,
    out int    tsne_input_pcs,
    out int    tsne_max_dims,
    out int    tsne_max_iter,
    out int    tsne_stop_lying_iter,
    out int    tsne_mom_switch_iter,
    out float  tsne_theta,
    out int    umap_n_neighbors,
    out int    umap_input_pcs,
    out int    umap_max_dims,
    out float  umap_min_dist,
    out string umap_metric,
    src py     "../rna/stages/analyzer/parse_csv",
) using (
    volatile = strict,
)

stage SUMMARIZE_REANALYSIS(
    in  string sample_id,
    in  string sample_desc,
    in  h5     filtered_matrices,
    in  path   analysis,
    in  json   analyze_matrices_summary,
    out html   web_summary,
    out json   summary,
    out path   feature_bc_matrix_mex,
    src py     "../rna/stages/analyzer/summarize_reanalysis",
) split (
) using (
    volatile = strict,
) retain (
    summary,
)

stage CORRECT_CHEMISTRY_BATCH(
    in  pickle dimred_matrix,
    in  pickle matrix_barcode_feature_info,
    in  map[]  library_info,
    in  int    cbc_knn,
    in  float  cbc_alpha,
    in  float  cbc_sigma,
    in  bool   cbc_realign_panorama,
    in  bool   skip,
    out float  batch_score_before_correction,
    out float  batch_score_after_correction,
    out h5     aligned_pca_h5,
    out path   aligned_pca_csv,
    src py     "../rna/stages/analyzer/correct_chemistry_batch",
) split (
    in  int    batch_id,
    in  map    batch_to_bc_indices,
    in  pickle ordered_dimred_matrix,
    in  pickle idx_to_batch_id,
    in  bool   need_reorder_barcode,
    in  pickle barcode_reorder_index,
    out binary batch_nearest_neighbor,
) using (
    mem_gb = 4,
)

stage CHOOSE_DIMENSION_REDUCTION(
    in  bool chemistry_batch_correction,
    out bool disable_run_pca,
    out bool disable_correct_chemistry_batch,
    src py   "../rna/stages/analyzer/choose_dimension_reduction",
)

stage CHOOSE_DIMENSION_REDUCTION_OUTPUT(
    in  bool   skip,
    in  h5[]   pca_h5_list,
    in  path[] pca_csv_list,
    out h5     pca_h5,
    out path   pca_csv,
    src py     "../rna/stages/analyzer/choose_dimension_reduction_output",
)

#
# @include "sc_rna_analyzer.mro"
#

pipeline SC_RNA_ANALYZER(
    in  h5     filtered_matrices_h5,
    in  map[]  aggr_library_info,
    in  bool   no_secondary_analysis,
    in  csv    use_genes,
    in  csv    exclude_genes,
    in  csv    use_bcs,
    in  int    num_analysis_bcs,
    in  int    random_seed,
    in  int    num_pca_bcs,
    in  int    num_pca_genes,
    in  int    num_principal_comps,
    in  bool   chemistry_batch_correction,
    in  int    cbc_knn,
    in  float  cbc_alpha,
    in  float  cbc_sigma,
    in  bool   cbc_realign_panorama,
    in  int    max_clusters,
    in  int    graphclust_neighbors,
    in  float  neighbor_a,
    in  float  neighbor_b,
    in  int    tsne_perplexity,
    in  int    tsne_input_pcs,
    in  int    tsne_max_dims,
    in  int    tsne_max_iter,
    in  int    tsne_stop_lying_iter,
    in  int    tsne_mom_switch_iter,
    in  float  tsne_theta,
    in  int    umap_n_neighbors,
    in  int    umap_input_pcs,
    in  int    umap_max_dims,
    in  float  umap_min_dist,
    in  string umap_metric,
    in  int    force_cells,
    in  bool   skip_multigenome_analysis,
    out path   analysis,
    out path   analysis_csv,
    out h5     cloupe_matrix_h5,
    out json   summary,
)
{
    call ANALYZER_PREFLIGHT(
        skip                 = self.no_secondary_analysis,
        filtered_matrices_h5 = self.filtered_matrices_h5,
        use_genes            = self.use_genes,
        exclude_genes        = self.exclude_genes,
        use_bcs              = self.use_bcs,
        num_analysis_bcs     = self.num_analysis_bcs,
        force_cells          = self.force_cells,
        random_seed          = self.random_seed,
        num_pca_bcs          = self.num_pca_bcs,
        num_pca_genes        = self.num_pca_genes,
        num_principal_comps  = self.num_principal_comps,
        cbc_knn              = self.cbc_knn,
        cbc_alpha            = self.cbc_alpha,
        cbc_sigma            = self.cbc_sigma,
        cbc_realign_panorama = self.cbc_realign_panorama,
        max_clusters         = self.max_clusters,
        graphclust_neighbors = self.graphclust_neighbors,
        neighbor_a           = self.neighbor_a,
        neighbor_b           = self.neighbor_b,
        tsne_perplexity      = self.tsne_perplexity,
        tsne_input_pcs       = self.tsne_input_pcs,
        tsne_max_dims        = self.tsne_max_dims,
        tsne_max_iter        = self.tsne_max_iter,
        tsne_stop_lying_iter = self.tsne_stop_lying_iter,
        tsne_mom_switch_iter = self.tsne_mom_switch_iter,
        tsne_theta           = self.tsne_theta,
        umap_n_neighbors     = self.umap_n_neighbors,
        umap_input_pcs       = self.umap_input_pcs,
        umap_max_dims        = self.umap_max_dims,
        umap_min_dist        = self.umap_min_dist,
        umap_metric          = self.umap_metric,
    ) using (
        volatile = true,
    )

    call PREPROCESS_MATRIX(
        matrix_h5        = self.filtered_matrices_h5,
        random_seed      = self.random_seed,
        use_genes        = self.use_genes,
        exclude_genes    = self.exclude_genes,
        use_bcs          = self.use_bcs,
        num_bcs          = self.num_analysis_bcs,
        force_cells      = self.force_cells,
        is_antibody_only = ANALYZER_PREFLIGHT.is_antibody_only,
        skip             = ANALYZER_PREFLIGHT.skip,
    ) using (
        volatile = true,
    )

    call RUN_MULTIGENOME_ANALYSIS(
        filtered_matrices_h5 = self.filtered_matrices_h5,
        is_multi_genome      = PREPROCESS_MATRIX.is_multi_genome,
        skip                 = ANALYZER_PREFLIGHT.skip,
    ) using (
        disabled = self.skip_multigenome_analysis,
        volatile = true,
    )

    call CHOOSE_DIMENSION_REDUCTION(
        chemistry_batch_correction = self.chemistry_batch_correction,
    ) using (
        local = true,
    )

    call RUN_PCA(
        matrix_h5        = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        random_seed      = self.random_seed,
        num_bcs          = self.num_pca_bcs,
        num_genes        = self.num_pca_genes,
        num_pcs          = self.num_principal_comps,
        is_antibody_only = ANALYZER_PREFLIGHT.is_antibody_only,
        skip             = ANALYZER_PREFLIGHT.skip,
    ) using (
        disabled = CHOOSE_DIMENSION_REDUCTION.disable_run_pca,
        volatile = true,
    )

    call RUN_FBPCA(
        matrix_h5        = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        library_info     = self.aggr_library_info,
        num_pcs          = self.num_principal_comps,
        skip             = ANALYZER_PREFLIGHT.skip,
        is_antibody_only = ANALYZER_PREFLIGHT.is_antibody_only,
    ) using (
        disabled = CHOOSE_DIMENSION_REDUCTION.disable_correct_chemistry_batch,
        volatile = true,
    )

    call CORRECT_CHEMISTRY_BATCH(
        dimred_matrix               = RUN_FBPCA.dimred_matrix,
        matrix_barcode_feature_info = RUN_FBPCA.matrix_barcode_feature_info,
        library_info                = self.aggr_library_info,
        cbc_knn                     = self.cbc_knn,
        cbc_alpha                   = self.cbc_alpha,
        cbc_sigma                   = self.cbc_sigma,
        cbc_realign_panorama        = self.cbc_realign_panorama,
        skip                        = ANALYZER_PREFLIGHT.skip,
    ) using (
        disabled = CHOOSE_DIMENSION_REDUCTION.disable_correct_chemistry_batch,
        volatile = true,
    )

    call CHOOSE_DIMENSION_REDUCTION_OUTPUT(
        skip         = ANALYZER_PREFLIGHT.skip,
        pca_h5_list  = [
            RUN_PCA.pca_h5,
            CORRECT_CHEMISTRY_BATCH.aligned_pca_h5,
        ],
        pca_csv_list = [
            RUN_PCA.pca_csv,
            CORRECT_CHEMISTRY_BATCH.aligned_pca_csv,
        ],
    )

    call RUN_KMEANS(
        matrix_h5    = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        pca_h5       = CHOOSE_DIMENSION_REDUCTION_OUTPUT.pca_h5,
        random_seed  = self.random_seed,
        max_clusters = self.max_clusters,
        skip         = ANALYZER_PREFLIGHT.skip,
        num_bcs      = null,
        num_pcs      = null,
    ) using (
        volatile = true,
    )

    call RUN_GRAPH_CLUSTERING(
        matrix_h5          = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        pca_h5             = CHOOSE_DIMENSION_REDUCTION_OUTPUT.pca_h5,
        num_neighbors      = self.graphclust_neighbors,
        neighbor_a         = self.neighbor_a,
        neighbor_b         = self.neighbor_b,
        input_pcs          = null,
        num_bcs            = null,
        similarity_type    = "nn",
        balltree_leaf_size = null,
        skip               = ANALYZER_PREFLIGHT.skip,
    ) using (
        volatile = true,
    )

    call MERGE_CLUSTERS(
        matrix_h5   = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        pca_h5      = CHOOSE_DIMENSION_REDUCTION_OUTPUT.pca_h5,
        clusters_h5 = RUN_GRAPH_CLUSTERING.clusters_h5,
        skip        = ANALYZER_PREFLIGHT.skip,
    ) using (
        volatile = true,
    )

    call COMBINE_CLUSTERING(
        kmeans_h5      = RUN_KMEANS.kmeans_h5,
        kmeans_csv     = RUN_KMEANS.kmeans_csv,
        graphclust_h5  = MERGE_CLUSTERS.clusters_h5,
        graphclust_csv = MERGE_CLUSTERS.clusters_csv,
        skip           = ANALYZER_PREFLIGHT.skip,
    ) using (
        volatile = true,
    )

    call RUN_DIFFERENTIAL_EXPRESSION(
        matrix_h5        = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        clustering_h5    = COMBINE_CLUSTERING.clustering_h5,
        random_seed      = self.random_seed,
        max_clusters     = self.max_clusters,
        is_antibody_only = ANALYZER_PREFLIGHT.is_antibody_only,
        skip             = ANALYZER_PREFLIGHT.skip,
    ) using (
        volatile = true,
    )

    call RUN_TSNE(
        matrix_h5        = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        pca_h5           = CHOOSE_DIMENSION_REDUCTION_OUTPUT.pca_h5,
        random_seed      = self.random_seed,
        perplexity       = self.tsne_perplexity,
        input_pcs        = self.tsne_input_pcs,
        max_dims         = self.tsne_max_dims,
        max_iter         = self.tsne_max_iter,
        stop_lying_iter  = self.tsne_stop_lying_iter,
        mom_switch_iter  = self.tsne_mom_switch_iter,
        theta            = self.tsne_theta,
        is_antibody_only = ANALYZER_PREFLIGHT.is_antibody_only,
        skip             = ANALYZER_PREFLIGHT.skip,
    ) using (
        volatile = true,
    )

    call RUN_UMAP(
        matrix_h5        = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        pca_h5           = CHOOSE_DIMENSION_REDUCTION_OUTPUT.pca_h5,
        random_seed      = self.random_seed,
        n_neighbors      = self.umap_n_neighbors,
        input_pcs        = self.umap_input_pcs,
        max_dims         = self.umap_max_dims,
        min_dist         = self.umap_min_dist,
        metric           = self.umap_metric,
        is_antibody_only = ANALYZER_PREFLIGHT.is_antibody_only,
        skip             = ANALYZER_PREFLIGHT.skip,
    ) using (
        volatile = true,
    )

    call SUMMARIZE_ANALYSIS(
        matrix_h5                     = PREPROCESS_MATRIX.preprocessed_matrix_h5,
        pca_h5                        = CHOOSE_DIMENSION_REDUCTION_OUTPUT.pca_h5,
        clustering_h5                 = COMBINE_CLUSTERING.clustering_h5,
        diffexp_h5                    = RUN_DIFFERENTIAL_EXPRESSION.diffexp_h5,
        tsne_h5                       = RUN_TSNE.tsne_h5,
        umap_h5                       = RUN_UMAP.umap_h5,
        pca_csv                       = CHOOSE_DIMENSION_REDUCTION_OUTPUT.pca_csv,
        clustering_csv                = COMBINE_CLUSTERING.clustering_csv,
        diffexp_csv                   = RUN_DIFFERENTIAL_EXPRESSION.diffexp_csv,
        tsne_csv                      = RUN_TSNE.tsne_csv,
        umap_csv                      = RUN_UMAP.umap_csv,
        multi_genome_summary          = RUN_MULTIGENOME_ANALYSIS.multi_genome_summary,
        multi_genome_csv              = RUN_MULTIGENOME_ANALYSIS.multi_genome_csv,
        multi_genome_json             = RUN_MULTIGENOME_ANALYSIS.multi_genome_json,
        is_multi_genome               = PREPROCESS_MATRIX.is_multi_genome,
        chemistry_batch_correction    = self.chemistry_batch_correction,
        batch_score_before_correction = CORRECT_CHEMISTRY_BATCH.batch_score_before_correction,
        batch_score_after_correction  = CORRECT_CHEMISTRY_BATCH.batch_score_after_correction,
        skip                          = ANALYZER_PREFLIGHT.skip,
    )

    return (
        analysis         = SUMMARIZE_ANALYSIS.analysis,
        analysis_csv     = SUMMARIZE_ANALYSIS.analysis_csv,
        cloupe_matrix_h5 = PREPROCESS_MATRIX.cloupe_matrix_h5,
        summary          = SUMMARIZE_ANALYSIS.summary,
    )
}

#
# @include "_sc_rna_aggregator_stages.mro"
#

stage AGGREGATOR_PREFLIGHT(
    in  map[]  sample_defs,
    in  string normalization_mode,
    src py     "../rna/stages/aggregator/aggregator_preflight",
)

stage PARSE_CSV(
    in  path   pipestance_root,
    in  csv    aggregation_csv,
    in  bool   reanalyze,
    in  h5     matrix_h5,
    in  string product_type,
    out csv    aggregation_csv,
    out map[]  sample_defs,
    src py     "../rna/stages/aggregator/parse_csv",
)

stage CHECK_MOLECULE_INFO_VERSION(
    in  map[]  sample_defs,
    in  string product_type,
    out map[]  updated_sample_defs,
    src py     "../rna/stages/aggregator/check_molecule_info_version",
) split (
    in  int    mol_h5_version,
    in  map    sample_def,
    out map    updated_sample_def,
)

stage SETUP_SAMPLES(
    in  map[] sample_defs,
    out map   gem_group_index,
    out map[] libraries,
    out json  gem_group_index_json,
    out bool  chemistry_batch_correction,
    src py    "../rna/stages/aggregator/setup_samples",
)

stage MERGE_MOLECULES(
    in  map[]  sample_defs,
    in  map[]  libraries,
    out h5     merged_molecules,
    out map    gem_group_barcode_ranges,
    src py     "../rna/stages/aggregator/merge_molecules",
) split (
    in  string aggr_id,
    in  h5     molecule_h5,
    in  int    barcode_idx_offset,
    in  int    barcode_idx_end,
    in  h5     merged_barcodes,
    in  h5     merged_feature_reference,
    out int[]  new_gem_groups,
    out h5     molecule_h5,
) using (
    mem_gb = 16,
)

stage NORMALIZE_DEPTH(
    in  map     gem_group_index,
    in  h5      molecules,
    in  string  normalization_mode,
    in  map     gem_group_barcode_ranges,
    in  float   targeted_depth_factor,
    out h5[]    raw_matrices_h5,
    out int     raw_nnz,
    out h5[]    filtered_matrices_h5,
    out int     filtered_nnz,
    out json    summary,
    src py      "../rna/stages/aggregator/normalize_depth",
) split (
    in  float[] frac_reads_kept,
    in  int[]   num_cells,
    in  int     chunk_start,
    in  int     chunk_len,
    out json    chunk_summary,
    out h5      raw_matrix_h5,
    out h5      filtered_matrix_h5,
)

stage WRITE_MATRICES(
    in  map[] sample_defs,
    in  map   gem_group_index,
    in  h5    molecules,
    in  h5[]  raw_matrices_h5,
    in  int   raw_nnz,
    in  h5[]  filtered_matrices_h5,
    in  int   filtered_nnz,
    in  json  summary,
    out h5    raw_matrix_h5,
    out path  raw_matrix_mex,
    out h5    filtered_matrix_h5,
    out path  filtered_matrix_mex,
    out h5    barcode_summary_h5,
    out json  summary,
    src py    "../rna/stages/aggregator/write_matrices",
) split (
)

stage CHECK_INVARIANTS(
    in  map[] input_sample_defs,
    in  h5    merged_raw_gene_bc_matrices_h5,
    out json  summary,
    src py    "../rna/stages/aggregator/check_invariants",
) split (
)

stage SUMMARIZE_AGGREGATED_REPORTS(
    in  string sample_id,
    in  string sample_desc,
    in  map    gem_group_index,
    in  h5     filtered_matrices_h5,
    in  h5     barcode_summary_h5,
    in  path   analysis,
    in  json   normalize_depth_summary,
    in  json   analyze_matrices_summary,
    in  string product_type,
    out json   summary,
    out html   web_summary,
    out json   web_summary_data,
    src py     "../rna/stages/aggregator/summarize_reports",
) split (
)

#
# @include "sc_rna_aggregator.mro"
#

pipeline SC_RNA_AGGREGATOR(
    in  string sample_id,
    in  string sample_desc,
    in  map[]  sample_defs,
    in  string normalization_mode,
    in  bool   no_secondary_analysis,
    in  int    num_analysis_bcs,
    in  int    num_pca_bcs,
    in  int    num_pca_genes,
    in  int    num_principal_comps,
    in  int    cbc_knn,
    in  float  cbc_alpha,
    in  float  cbc_sigma,
    in  bool   cbc_realign_panorama,
    in  int    max_clusters,
    in  int    graphclust_neighbors,
    in  float  neighbor_a,
    in  float  neighbor_b,
    in  int    tsne_perplexity,
    in  int    tsne_input_pcs,
    in  int    random_seed,
    in  int    tsne_max_dims,
    in  int    tsne_max_iter,
    in  int    tsne_stop_lying_iter,
    in  int    tsne_mom_switch_iter,
    in  float  tsne_theta,
    in  string product_type,
    out h5     raw_gene_bc_matrices_h5,
    out path   raw_gene_bc_matrices_mex,
    out h5     filtered_gene_bc_matrices_h5,
    out path   filtered_gene_bc_matrices_mex,
    out h5     molecule_info,
    out path   analysis,
    out path   analysis_csv,
    out json   analysis_summary,
    out json   summary,
    out html   web_summary,
    out json   web_summary_data,
    out map    gem_group_index,
    out json   gem_group_index_json,
)
{
    call AGGREGATOR_PREFLIGHT(
        sample_defs        = self.sample_defs,
        normalization_mode = self.normalization_mode,
    ) using (
        preflight = true,
    )

    call SETUP_SAMPLES(
        sample_defs = self.sample_defs,
    ) using (
        volatile = true,
    )

    call MERGE_MOLECULES(
        sample_defs = self.sample_defs,
        libraries   = SETUP_SAMPLES.libraries,
    ) using (
        volatile = true,
    )

    call NORMALIZE_DEPTH(
        gem_group_index          = SETUP_SAMPLES.gem_group_index,
        normalization_mode       = self.normalization_mode,
        molecules                = MERGE_MOLECULES.merged_molecules,
        gem_group_barcode_ranges = MERGE_MOLECULES.gem_group_barcode_ranges,
        targeted_depth_factor    = 2,
    )

    call WRITE_MATRICES(
        sample_defs          = self.sample_defs,
        gem_group_index      = SETUP_SAMPLES.gem_group_index,
        molecules            = MERGE_MOLECULES.merged_molecules,
        raw_matrices_h5      = NORMALIZE_DEPTH.raw_matrices_h5,
        filtered_matrices_h5 = NORMALIZE_DEPTH.filtered_matrices_h5,
        raw_nnz              = NORMALIZE_DEPTH.raw_nnz,
        filtered_nnz         = NORMALIZE_DEPTH.filtered_nnz,
        summary              = NORMALIZE_DEPTH.summary,
    )

    call SC_RNA_ANALYZER(
        no_secondary_analysis      = self.no_secondary_analysis,
        filtered_matrices_h5       = WRITE_MATRICES.filtered_matrix_h5,
        num_analysis_bcs           = self.num_analysis_bcs,
        num_pca_bcs                = self.num_pca_bcs,
        num_pca_genes              = self.num_pca_genes,
        num_principal_comps        = self.num_principal_comps,
        aggr_library_info          = SETUP_SAMPLES.libraries,
        chemistry_batch_correction = SETUP_SAMPLES.chemistry_batch_correction,
        cbc_knn                    = self.cbc_knn,
        cbc_alpha                  = self.cbc_alpha,
        cbc_sigma                  = self.cbc_sigma,
        cbc_realign_panorama       = self.cbc_realign_panorama,
        max_clusters               = self.max_clusters,
        graphclust_neighbors       = self.graphclust_neighbors,
        neighbor_a                 = self.neighbor_a,
        neighbor_b                 = self.neighbor_b,
        tsne_perplexity            = self.tsne_perplexity,
        tsne_input_pcs             = self.tsne_input_pcs,
        tsne_theta                 = self.tsne_theta,
        random_seed                = self.random_seed,
        use_genes                  = null,
        exclude_genes              = null,
        use_bcs                    = null,
        tsne_max_dims              = self.tsne_max_dims,
        tsne_max_iter              = self.tsne_max_iter,
        tsne_stop_lying_iter       = self.tsne_stop_lying_iter,
        tsne_mom_switch_iter       = self.tsne_mom_switch_iter,
        umap_n_neighbors           = null,
        umap_input_pcs             = null,
        umap_max_dims              = null,
        umap_min_dist              = null,
        umap_metric                = null,
        force_cells                = null,
        skip_multigenome_analysis  = false,
    )

    call SUMMARIZE_AGGREGATED_REPORTS(
        sample_id                = self.sample_id,
        sample_desc              = self.sample_desc,
        gem_group_index          = SETUP_SAMPLES.gem_group_index,
        filtered_matrices_h5     = WRITE_MATRICES.filtered_matrix_h5,
        barcode_summary_h5       = WRITE_MATRICES.barcode_summary_h5,
        analysis                 = SC_RNA_ANALYZER.analysis,
        normalize_depth_summary  = WRITE_MATRICES.summary,
        analyze_matrices_summary = SC_RNA_ANALYZER.summary,
        product_type             = self.product_type,
    )

    return (
        filtered_gene_bc_matrices_h5  = WRITE_MATRICES.filtered_matrix_h5,
        filtered_gene_bc_matrices_mex = WRITE_MATRICES.filtered_matrix_mex,
        raw_gene_bc_matrices_h5       = WRITE_MATRICES.raw_matrix_h5,
        raw_gene_bc_matrices_mex      = WRITE_MATRICES.raw_matrix_mex,
        analysis                      = SC_RNA_ANALYZER.analysis,
        analysis_csv                  = SC_RNA_ANALYZER.analysis_csv,
        analysis_summary              = SC_RNA_ANALYZER.summary,
        summary                       = SUMMARIZE_AGGREGATED_REPORTS.summary,
        web_summary                   = SUMMARIZE_AGGREGATED_REPORTS.web_summary,
        web_summary_data              = SUMMARIZE_AGGREGATED_REPORTS.web_summary_data,
        gem_group_index               = SETUP_SAMPLES.gem_group_index,
        gem_group_index_json          = SETUP_SAMPLES.gem_group_index_json,
        molecule_info                 = MERGE_MOLECULES.merged_molecules,
    )
}

#
# @include "_assign_tags_stages.mro"
#

stage CALL_TAGS_MARGINAL(
    in  csv  filtered_barcodes,
    in  h5   filtered_feature_counts_matrix,
    out csv  marginal_tag_calls_per_cell,
    out json marginal_tag_call_metrics_json,
    out csv  marginal_tag_frequencies,
    out json tag_contaminant_info,
    src py   "stages/feature/call_tags_marginal",
) split (
)

stage CALL_TAGS_JIBES(
    in  csv    marginal_tag_calls_per_cell,
    in  csv    marginal_tag_frequencies,
    in  csv    filtered_barcodes,
    in  h5     filtered_feature_counts_matrix,
    in  h5     molecule_info,
    out json   jibes_parameters,
    out csv    jibes_model_summary,
    out json   jibes_summary_data,
    out csv    assignment_confidence_table,
    out csv    tag_calls_summary,
    out csv    tag_calls_per_cell,
    out json   tag_call_metrics_json,
    out json   cells_per_tag,
    out json   tag_umi_thresholds_json,
    out csv    tag_umi_thresholds_csv,
    out pickle tag_assigner_pickle,
    out json   non_tag_assignments,
    src py     "stages/feature/call_tags_jibes",
) split (
)

stage SUMMARIZE_MULTIPLEXING_ANALYSIS(
    in  csv  tag_calls_summary,
    in  csv  tag_calls_per_cell,
    in  json cells_per_tag,
    in  json tag_umi_thresholds_json,
    in  csv  tag_umi_thresholds_csv,
    in  csv  marginal_tag_frequencies,
    in  csv  assignment_confidence_table,
    out path multiplexing_analysis,
    src py   "../rna/stages/feature/summarize_multiplexing_analysis",
) using (
    mem_gb = 4,
)

stage DETERMINE_SAMPLE_ASSIGNMENTS(
    in  json      cells_per_tag,
    in  json      force_sample_barcodes,
    in  csv       filtered_barcodes,
    in  json      multi_graph,
    in  int       gem_well,
    out json      sample_barcodes_json,
    out json      multiplet_barcodes_json,
    out map<json> sample_summaries,
    out json      summary,
    src py        "../rna/stages/multi/determine_sample_assignments",
) using (
    volatile = strict,
)

stage COMPUTE_EXTRA_MULTIPLEXING_METRICS(
    in  h5   molecule_info,
    in  h5   filtered_feature_counts_matrix,
    in  json multi_graph,
    in  json sample_barcodes,
    in  json multiplet_barcodes,
    out json summary,
    src py   "../rna/stages/multi/compute_extra_multiplexing_metrics",
) using (
    mem_gb   = 4,
    threads  = 1,
    volatile = strict,
)

#
# @include "_assign_tags.mro"
#

pipeline _ASSIGN_TAGS(
    in  csv            filtered_barcodes,
    in  h5             filtered_feature_counts_matrix,
    in  h5             molecule_info,
    in  json           force_sample_barcodes,
    in  json           multi_graph,
    in  int            gem_well,
    out AssignTagsOuts assign_tags_outs,
)
{
    call CALL_TAGS_MARGINAL(
        filtered_barcodes = self.filtered_barcodes,
        filtered_feature_counts_matrix = self.filtered_feature_counts_matrix,
    )

    call CALL_TAGS_JIBES(
        marginal_tag_calls_per_cell = CALL_TAGS_MARGINAL.marginal_tag_calls_per_cell,
        filtered_barcodes           = self.filtered_barcodes,
        filtered_feature_counts_matrix = self.filtered_feature_counts_matrix,
        molecule_info               = self.molecule_info,
        marginal_tag_frequencies    = CALL_TAGS_MARGINAL.marginal_tag_frequencies,
    )

    call SUMMARIZE_MULTIPLEXING_ANALYSIS(
        tag_calls_summary           = CALL_TAGS_JIBES.tag_calls_summary,
        tag_calls_per_cell          = CALL_TAGS_JIBES.tag_calls_per_cell,
        cells_per_tag               = CALL_TAGS_JIBES.cells_per_tag,
        tag_umi_thresholds_json     = CALL_TAGS_JIBES.tag_umi_thresholds_json,
        tag_umi_thresholds_csv      = CALL_TAGS_JIBES.tag_umi_thresholds_csv,
        marginal_tag_frequencies    = CALL_TAGS_MARGINAL.marginal_tag_frequencies,
        assignment_confidence_table = CALL_TAGS_JIBES.assignment_confidence_table,
    )

    call DETERMINE_SAMPLE_ASSIGNMENTS(
        cells_per_tag         = CALL_TAGS_JIBES.cells_per_tag,
        force_sample_barcodes = self.force_sample_barcodes,
        filtered_barcodes     = self.filtered_barcodes,
        multi_graph           = self.multi_graph,
        gem_well              = self.gem_well,
    )

    call COMPUTE_EXTRA_MULTIPLEXING_METRICS(
        molecule_info      = self.molecule_info,
        filtered_feature_counts_matrix = self.filtered_feature_counts_matrix,
        multi_graph        = self.multi_graph,
        sample_barcodes    = DETERMINE_SAMPLE_ASSIGNMENTS.sample_barcodes_json,
        multiplet_barcodes = DETERMINE_SAMPLE_ASSIGNMENTS.multiplet_barcodes_json,
    )

    call MERGE_METRICS(
        summaries = [
            CALL_TAGS_JIBES.tag_call_metrics_json,
            DETERMINE_SAMPLE_ASSIGNMENTS.summary,
            COMPUTE_EXTRA_MULTIPLEXING_METRICS.summary,
        ],
    )

    return (
        assign_tags_outs = {
            assignment_confidence_table: CALL_TAGS_JIBES.assignment_confidence_table,
            cells_per_tag:               CALL_TAGS_JIBES.cells_per_tag,
            jibes_model_summary:         CALL_TAGS_JIBES.jibes_model_summary,
            jibes_parameters:            CALL_TAGS_JIBES.jibes_parameters,
            jibes_summary_data:          CALL_TAGS_JIBES.jibes_summary_data,
            marginal_tag_frequencies:    CALL_TAGS_MARGINAL.marginal_tag_frequencies,
            non_tag_assignments:         CALL_TAGS_JIBES.non_tag_assignments,
            sample_assignment_metrics:   DETERMINE_SAMPLE_ASSIGNMENTS.sample_summaries,
            sample_barcodes_json:        DETERMINE_SAMPLE_ASSIGNMENTS.sample_barcodes_json,
            tag_assigner_pickle:         CALL_TAGS_JIBES.tag_assigner_pickle,
            tag_call_metrics_json:       MERGE_METRICS.summary,
            tag_calls_per_cell:          CALL_TAGS_JIBES.tag_calls_per_cell,
            tag_calls_summary:           CALL_TAGS_JIBES.tag_calls_summary,
            tag_contaminant_info:        CALL_TAGS_MARGINAL.tag_contaminant_info,
            tag_umi_thresholds_csv:      CALL_TAGS_JIBES.tag_umi_thresholds_csv,
            tag_umi_thresholds_json:     CALL_TAGS_JIBES.tag_umi_thresholds_json,
        },
    )
}

#
# @include "_basic_sc_rna_counter_stages.mro"
#

stage FILTER_BARCODES(
    in  string      sample_id,
    in  h5          matrices_h5,
    in  csv         barcode_correction_csv,
    in  bool        is_antibody_only,
    in  path        reference_path,
    in  int[]       gem_groups,
    in  string      chemistry_description,
    in  CellCalling config,
    out json        summary,
    out csv         filtered_barcodes,
    out csv         aggregate_barcodes,
    out h5          filtered_matrices_h5,
    out path        filtered_matrices_mex,
    out csv         nonambient_calls,
    src py          "../rna/stages/counter/filter_barcodes",
) split (
) using (
    mem_gb   = 8,
    volatile = strict,
)

stage MULTI_WRITE_PER_SAMPLE_MATRICES(
    in  h5               matrix_h5,
    in  h5               raw_matrix_h5,
    in  csv              filtered_barcodes,
    in  json             sample_barcodes_json,
    out SampleMatrices[] sample_matrices,
    src py               "../rna/stages/multi/multi_write_per_sample_matrices",
) split (
    in  string           sample,
    in  string[]         barcodes,
) using (
    volatile = strict,
)

stage SUMMARIZE_BASIC_REPORTS(
    in  h5     matrices_h5,
    in  csv    filtered_barcodes,
    in  csv    per_barcode_metrics,
    in  json   matrix_computer_summary,
    in  h5     barcode_summary,
    in  int    recovered_cells,
    in  path   reference_path,
    in  string barcode_whitelist,
    in  json[] summary_jsons,
    in  bool   cell_bcs_only,
    out json   summary,
    src py     "../rna/stages/counter/summarize_basic_reports",
) split (
) using (
    volatile = strict,
)

stage SUBSAMPLE_READS(
    in  h5     molecule_info,
    in  csv    filtered_barcodes,
    in  string target_mode,
    out json   summary,
    out pickle merged_metrics,
    src py     "../rna/stages/counter/subsample_reads",
) split (
    in  int    chunk_start,
    in  int    chunk_len,
    in  map[]  subsample_info,
    out pickle metrics,
) using (
    mem_gb   = 4,
    volatile = strict,
)

stage DISABLE_BAMS(
    in  bool no_bam,
    in  bool disable_multi,
    in  bool is_pd,
    out bool disable_legacy_bam,
    out bool disable_sample_bams,
    src py   "../rna/stages/multi/disable_bams",
) using (
    volatile = strict,
)

#
# @include "_slfe_cells_reporter.mro"
#

pipeline _SLFE_CELLS_REPORTER(
    in  string      barcode_whitelist,
    in  path        reference_path,
    in  int         recovered_cells,
    in  int         force_cells,
    in  frf.bincode slfe_feature_reference,
    in  json        target_panel_summary,
    in  string      target_set_name,
    in  h5          matrices_h5,
    in  map[]       read_chunks,
    in  int         gem_well,
    in  bui[]       report_mol_inputs,
    in  json        matrix_computer_summary,
    in  h5          barcode_summary,
    in  csv         filtered_barcodes,
    in  json        filter_barcodes_summary,
    in  csv         per_barcode_metrics,
    in  bool        include_introns,
    in  string      multi_config_sha,
    in  bi.bincode  barcode_index,
    out json        summary,
    out h5          molecule_info,
    out pickle      merged_subsampling_metrics,
)
{
    call WRITE_MOLECULE_INFO(
        gem_well                = self.gem_well,
        counts_bc_order         = self.report_mol_inputs,
        reference_path          = self.reference_path,
        read_chunks             = self.read_chunks,
        feature_reference       = self.slfe_feature_reference,
        target_panel_summary    = self.target_panel_summary,
        target_set_name         = self.target_set_name,
        matrix_computer_summary = self.matrix_computer_summary,
        recovered_cells         = self.recovered_cells,
        force_cells             = self.force_cells,
        filtered_barcodes       = self.filtered_barcodes,
        include_introns         = self.include_introns,
        multi_config_sha        = self.multi_config_sha,
        sample_barcodes_json    = null,
        per_sample_metrics      = null,
        barcode_index           = self.barcode_index,
    )

    call SUBSAMPLE_READS(
        molecule_info     = WRITE_MOLECULE_INFO.single_mol_info.h5_file,
        filtered_barcodes = self.filtered_barcodes,
        target_mode       = null,
    ) using (
        volatile = true,
    )

    call SUMMARIZE_BASIC_REPORTS(
        matrices_h5             = self.matrices_h5,
        filtered_barcodes       = self.filtered_barcodes,
        per_barcode_metrics     = self.per_barcode_metrics,
        matrix_computer_summary = self.matrix_computer_summary,
        barcode_summary         = self.barcode_summary,
        recovered_cells         = self.recovered_cells,
        reference_path          = self.reference_path,
        barcode_whitelist       = self.barcode_whitelist,
        # this is being run "library level", use all bcs
        cell_bcs_only           = false,
        summary_jsons           = [
            self.matrix_computer_summary,
            SUBSAMPLE_READS.summary,
            WRITE_MOLECULE_INFO.single_mol_info.summary,
            self.filter_barcodes_summary,
        ],
    )

    return (
        summary                    = SUMMARIZE_BASIC_REPORTS.summary,
        molecule_info              = WRITE_MOLECULE_INFO.single_mol_info.h5_file,
        merged_subsampling_metrics = SUBSAMPLE_READS.merged_metrics,
    )
}

# CELLS_REPORTER but for sliced samples, does not write the molecule info
pipeline _SAMPLE_CELLS_REPORTER(
    in  h5     molecule_info,
    in  string barcode_whitelist,
    in  path   reference_path,
    in  int    recovered_cells,
    in  h5     matrices_h5,
    in  json   matrix_computer_summary,
    in  csv    filtered_barcodes,
    in  csv    per_barcode_metrics,
    in  h5     barcode_summary,
    in  json   sample_assignment_metrics,
    in  json   count_analyzer_metrics,
    in  json   crispr_analyzer_metrics,
    in  json   targeted_analyzer_metrics,
    in  json   target_features_metrics,
    out json   summary,
    out pickle merged_subsampling_metrics,
)
{
    call SUBSAMPLE_READS(
        molecule_info     = self.molecule_info,
        filtered_barcodes = self.filtered_barcodes,
        target_mode       = null,
    ) using (
        volatile = true,
    )

    call SUMMARIZE_BASIC_REPORTS(
        matrices_h5             = self.matrices_h5,
        filtered_barcodes       = self.filtered_barcodes,
        per_barcode_metrics     = self.per_barcode_metrics,
        matrix_computer_summary = self.matrix_computer_summary,
        barcode_summary         = self.barcode_summary,
        recovered_cells         = self.recovered_cells,
        reference_path          = self.reference_path,
        barcode_whitelist       = self.barcode_whitelist,
        # we want "all reads" etc to include only those with sample barcodes.
        cell_bcs_only           = true,
        summary_jsons           = [
            self.matrix_computer_summary,
            SUBSAMPLE_READS.summary,
            self.sample_assignment_metrics,
            self.count_analyzer_metrics,
            self.crispr_analyzer_metrics,
            self.targeted_analyzer_metrics,
            self.target_features_metrics,
        ],
    )

    return (
        summary                    = SUMMARIZE_BASIC_REPORTS.summary,
        merged_subsampling_metrics = SUBSAMPLE_READS.merged_metrics,
    )
}

#
# @include "_slfe_partial_first_pass.mro"
#

# Inputs copied crom _cr_lib_stages. Cleaner way to do this?
pipeline _SLFE_PARTIAL_FIRST_PASS(
    in  int          gem_well,
    in  map[]        read_chunks,
    in  path         reference_path,
    in  ReadShards   read_shards,
    in  fbc.bincode  feature_counts,
    in  frf.bincode  feature_reference,
    in  csv          target_set,
    in  ChemistryDef chemistry_def,
    in  bool         include_introns,
    in  string       aligner,
    in  bool         is_pd,
    in  int          trim_polya_min_score,
    in  int          trim_tso_min_score,
    in  tbcc.bincode total_barcode_counts,
    in  bcc.bincode  corrected_barcode_counts,
    out int          umi_read_count_threshold,
    out json         umi_filtering_summary,
)
{
    call SUBSAMPLE_BARCODES(
        corrected_barcode_counts = self.corrected_barcode_counts,
    )

    call ALIGN_AND_COUNT as INITIAL_ALIGN_AND_COUNT(
        gem_well                    = self.gem_well,
        read_chunks                 = self.read_chunks,
        reference_path              = self.reference_path,
        read_shards                 = self.read_shards,
        feature_counts              = self.feature_counts,
        feature_reference           = self.feature_reference,
        target_set                  = self.target_set,
        chemistry_def               = self.chemistry_def,
        include_introns             = self.include_introns,
        aligner                     = self.aligner,
        aligner_subsample_rate      = null,
        is_pd                       = self.is_pd,
        transcriptome_min_score     = 30,
        trim_polya_min_score        = self.trim_polya_min_score,
        trim_tso_min_score          = self.trim_tso_min_score,
        targeted_umi_min_read_count = null,
        total_barcode_counts        = self.total_barcode_counts,
        barcode_subset              = SUBSAMPLE_BARCODES.barcode_subset,
    )

    call SET_TARGETED_UMI_FILTER(
        bc_umi_info       = INITIAL_ALIGN_AND_COUNT.bc_umi_info,
        feature_reference = self.feature_reference,
    )

    return (
        umi_read_count_threshold = SET_TARGETED_UMI_FILTER.umi_read_count_threshold,
        umi_filtering_summary    = SET_TARGETED_UMI_FILTER.summary,
    )
}

#
# @include "_slfe_matrix_computer.mro"
#

pipeline MAKE_READ_SHARDS_STRUCT(
    in  shard[]    valid_reads,
    in  shard[]    corrected_reads,
    in  shard[]    invalid_reads,
    out ReadShards read_shards,
)
{
    return (
        read_shards = {
            corrected_reads: self.corrected_reads,
            invalid_reads:   self.invalid_reads,
            valid_reads:     self.valid_reads,
        },
    )
}

pipeline _SLFE_MATRIX_COMPUTER(
    in  string            sample_id,
    in  ChemistryDef      chemistry_def,
    in  json              barcodes_under_tissue,
    in  bool              is_pd,
    in  map[]             chunks,
    in  path              reference_path,
    in  string[]          libraries_to_translate,
    in  float             subsample_rate,
    in  int               initial_reads,
    in  int               r1_length,
    in  int               r2_length,
    in  int               trim_polya_min_score,
    in  int               trim_tso_min_score,
    in  csv               feature_reference,
    in  csv               target_features,
    in  csv               target_set,
    in  string            target_set_name,
    in  bool              include_introns,
    in  string            aligner,
    in  bool              disable_target_umi_filter,
    in  int               rps_limit,
    # Note: _SLFE_MATRIX_COMPUTER processes data from a single gem well.
    in  int               gem_well,
    out frf.bincode       slfe_feature_reference,
    out csv               barcode_correction_csv,
    out h5                barcode_summary,
    out h5                raw_gene_bc_matrices_h5,
    out path              raw_gene_bc_matrices_mex,
    out ReadShards        read_shards,
    out bui[]             report_mol_inputs,
    out json              summary,
    out ann.bincode.lz4[] annotation_files,
    out csv               per_barcode_metrics,
    out bmsf[]            per_barcode_metrics_shard,
    out bui[]             bc_umi_info,
    out path              bam_header,
    out asf[]             alignments,
    out SampleMetrics[]   multi_metrics,
    out json              gem_well_alignment_metrics,
    out bi.bincode        barcode_index,
    out smf.json          sequencing_metrics,
)
{
    call MAKE_SHARD(
        gem_well               = self.gem_well,
        chemistry_def          = self.chemistry_def,
        read_chunks            = self.chunks,
        r1_length              = self.r1_length,
        r2_length              = self.r2_length,
        subsample_rate         = self.subsample_rate,
        initial_read_pairs     = self.initial_reads,
        reference_path         = self.reference_path,
        target_features        = self.target_features,
        target_set_name        = self.target_set_name,
        feature_reference_path = self.feature_reference,
        libraries_to_translate = self.libraries_to_translate,
    )

    call BARCODE_CORRECTION(
        gem_well               = self.gem_well,
        barcode_counts         = MAKE_SHARD.barcode_counts,
        barcode_segment_counts = MAKE_SHARD.barcode_segment_counts,
        chemistry_def          = self.chemistry_def,
        invalid_uncorrected    = MAKE_SHARD.invalid,
        valid_read_metrics     = MAKE_SHARD.bc_correct_summary,
        libraries_to_translate = self.libraries_to_translate,
    )

    call MAKE_READ_SHARDS_STRUCT(
        valid_reads     = MAKE_SHARD.valid,
        corrected_reads = BARCODE_CORRECTION.valid_corrected,
        invalid_reads   = BARCODE_CORRECTION.invalid,
    )

    call _SLFE_PARTIAL_FIRST_PASS(
        gem_well                 = self.gem_well,
        read_chunks              = self.chunks,
        reference_path           = self.reference_path,
        read_shards              = MAKE_READ_SHARDS_STRUCT.read_shards,
        feature_counts           = MAKE_SHARD.feature_counts,
        feature_reference        = MAKE_SHARD.feature_reference,
        target_set               = self.target_set,
        chemistry_def            = self.chemistry_def,
        include_introns          = self.include_introns,
        aligner                  = self.aligner,
        is_pd                    = self.is_pd,
        trim_polya_min_score     = self.trim_polya_min_score,
        trim_tso_min_score       = self.trim_tso_min_score,
        total_barcode_counts     = BARCODE_CORRECTION.total_barcode_counts,
        corrected_barcode_counts = BARCODE_CORRECTION.corrected_barcode_counts,
    ) using (
        disabled = self.disable_target_umi_filter,
    )

    call SET_ALIGNER_SUBSAMPLE_RATE(
        rps_limit                = self.rps_limit,
        barcodes_under_tissue    = self.barcodes_under_tissue,
        corrected_barcode_counts = BARCODE_CORRECTION.corrected_barcode_counts,
    )

    call ALIGN_AND_COUNT(
        gem_well                    = self.gem_well,
        read_chunks                 = self.chunks,
        reference_path              = self.reference_path,
        read_shards                 = MAKE_READ_SHARDS_STRUCT.read_shards,
        feature_counts              = MAKE_SHARD.feature_counts,
        feature_reference           = MAKE_SHARD.feature_reference,
        target_set                  = self.target_set,
        chemistry_def               = self.chemistry_def,
        include_introns             = self.include_introns,
        aligner                     = self.aligner,
        aligner_subsample_rate      = SET_ALIGNER_SUBSAMPLE_RATE.aligner_subsample_rate,
        is_pd                       = self.is_pd,
        transcriptome_min_score     = 30,
        trim_polya_min_score        = self.trim_polya_min_score,
        trim_tso_min_score          = self.trim_tso_min_score,
        targeted_umi_min_read_count = _SLFE_PARTIAL_FIRST_PASS.umi_read_count_threshold,
        total_barcode_counts        = BARCODE_CORRECTION.total_barcode_counts,
        barcode_subset              = null,
    )

    call COLLATE_METRICS(
        per_barcode_metrics  = ALIGN_AND_COUNT.per_barcode_metrics,
        reference_path       = self.reference_path,
        feature_reference    = MAKE_SHARD.feature_reference,
        sample_barcodes_json = null,
    )

    call WRITE_BARCODE_INDEX(
        barcode_counts        = BARCODE_CORRECTION.corrected_barcode_counts,
        barcodes_under_tissue = self.barcodes_under_tissue,
    )

    call WRITE_BARCODE_SUMMARY(
        unique_gem_groups = [self.gem_well],
        bc_umi_info       = ALIGN_AND_COUNT.bc_umi_info,
        feature_reference = MAKE_SHARD.feature_reference,
        barcode_index     = WRITE_BARCODE_INDEX.barcode_index,
    )

    call WRITE_H5_MATRIX(
        gem_well          = self.gem_well,
        counts            = ALIGN_AND_COUNT.counts_bc_order,
        feature_reference = MAKE_SHARD.feature_reference,
        chemistry_def     = self.chemistry_def,
        sample_id         = self.sample_id,
        barcode_index     = WRITE_BARCODE_INDEX.barcode_index,
    )

    call WRITE_MATRIX_MARKET(
        counts            = ALIGN_AND_COUNT.counts_bc_order,
        feature_reference = MAKE_SHARD.feature_reference,
        barcode_index     = WRITE_BARCODE_INDEX.barcode_index,
    )

    call MERGE_METRICS(
        summaries = [
            MAKE_SHARD.summary,
            BARCODE_CORRECTION.summary,
            _SLFE_PARTIAL_FIRST_PASS.umi_filtering_summary,
            COLLATE_METRICS.summary,
        ],
    )

    return (
        barcode_correction_csv     = ALIGN_AND_COUNT.barcode_summary,
        barcode_summary            = WRITE_BARCODE_SUMMARY.barcode_summary,
        raw_gene_bc_matrices_h5    = WRITE_H5_MATRIX.matrix,
        raw_gene_bc_matrices_mex   = WRITE_MATRIX_MARKET.feature_bc_matrix,
        read_shards                = MAKE_READ_SHARDS_STRUCT.read_shards,
        report_mol_inputs          = ALIGN_AND_COUNT.bc_umi_info,
        summary                    = MERGE_METRICS.summary,
        slfe_feature_reference     = MAKE_SHARD.feature_reference,
        annotation_files           = ALIGN_AND_COUNT.annotation_files,
        per_barcode_metrics        = COLLATE_METRICS.per_barcode_metrics,
        per_barcode_metrics_shard  = ALIGN_AND_COUNT.per_barcode_metrics,
        bc_umi_info                = ALIGN_AND_COUNT.bc_umi_info,
        bam_header                 = ALIGN_AND_COUNT.bam_header,
        alignments                 = ALIGN_AND_COUNT.pos_sorted,
        multi_metrics              = COLLATE_METRICS.multi_metrics,
        gem_well_alignment_metrics = COLLATE_METRICS.summary,
        barcode_index              = WRITE_BARCODE_INDEX.barcode_index,
        sequencing_metrics         = MAKE_SHARD.sequencing_metrics,
    )
}

#
# @include "_basic_sc_rna_counter.mro"
#

pipeline _BASIC_SC_RNA_COUNTER(
    in  int                  gem_well,
    in  string               sample_id,
    in  ChemistryDef         chemistry_def,
    in  string               barcode_whitelist,
    in  bool                 is_antibody_only,
    in  bool                 is_pd,
    in  map[]                chunks,
    in  path                 reference_path,
    in  CellCalling          cell_calling_config,
    in  string[]             libraries_to_translate,
    in  float                subsample_rate,
    in  int                  initial_reads,
    in  int                  r1_length,
    in  int                  r2_length,
    in  int                  trim_polya_min_score,
    in  int                  trim_tso_min_score,
    in  csv                  feature_reference,
    in  csv                  target_features,
    in  csv                  target_set,
    in  string               target_set_name,
    in  json                 target_panel_summary,
    in  bool                 include_introns,
    in  string               aligner,
    in  bool                 disable_target_umi_filter,
    in  string               multi_config_sha,
    in  bool                 no_bam,
    in  int                  rps_limit,
    in  json                 force_sample_barcodes,
    in  bool                 disable_multi,
    in  json                 multi_graph,
    out csv                  filtered_barcodes,
    out csv                  nonambient_cell_calls,
    out csv                  barcode_correction_csv,
    out bam                  possorted_genome_bam,
    out bam.bai              possorted_genome_bam_index,
    out json                 summary,
    out h5                   barcode_summary,
    out h5                   molecule_info,
    out h5                   raw_gene_bc_matrices_h5,
    out path                 raw_gene_bc_matrices_mex,
    out h5                   filtered_gene_bc_matrices_h5,
    out path                 filtered_gene_bc_matrices_mex,
    out int[]                gem_groups,
    out ReadShards           read_shards,
    out ann.bincode.lz4[]    annotation_files,
    out smf.json             sequencing_metrics,
    # subset of summary json, needed only for verifying correct sample metrics
    out json                 gem_well_alignment_metrics,
    # sliced outputs for multi
    out AssignTagsOuts       assign_tags,
    out SampleBamFile[]      multi_pos_sorted_bam,
    out SampleMoleculeInfo[] multi_molecule_info,
    out SampleMetrics[]      multi_metrics,
    out SampleMatrices[]     multi_matrices,
    out map<json>            sample_assignment_metrics,
    out json                 sample_barcodes_json,
    # everything below here is needed only for gem group merging
    out csv                  per_barcode_metrics,
    out bmsf[]               per_barcode_metrics_shard,
    out bui[]                bc_umi_info,
    out path                 bam_header,
    out asf[]                alignments,
    out string               barcode_whitelist,
    out map[]                read_chunks,
    out string               target_set_name,
    out frf.bincode          slfe_feature_reference,
    out pickle               merged_subsampling_metrics,
)
{
    call _SLFE_MATRIX_COMPUTER as _MATRIX_COMPUTER(
        gem_well                  = self.gem_well,
        sample_id                 = self.sample_id,
        chemistry_def             = self.chemistry_def,
        barcodes_under_tissue     = self.cell_calling_config.cell_barcodes,
        is_pd                     = self.is_pd,
        chunks                    = self.chunks,
        reference_path            = self.reference_path,
        libraries_to_translate    = self.libraries_to_translate,
        subsample_rate            = self.subsample_rate,
        initial_reads             = self.initial_reads,
        r1_length                 = self.r1_length,
        r2_length                 = self.r2_length,
        trim_polya_min_score      = self.trim_polya_min_score,
        trim_tso_min_score        = self.trim_tso_min_score,
        feature_reference         = self.feature_reference,
        target_features           = self.target_features,
        target_set                = self.target_set,
        target_set_name           = self.target_set_name,
        include_introns           = self.include_introns,
        aligner                   = self.aligner,
        disable_target_umi_filter = self.disable_target_umi_filter,
        rps_limit                 = self.rps_limit,
    )

    call FILTER_BARCODES(
        sample_id              = self.sample_id,
        matrices_h5            = _MATRIX_COMPUTER.raw_gene_bc_matrices_h5,
        barcode_correction_csv = _MATRIX_COMPUTER.barcode_correction_csv,
        config                 = self.cell_calling_config,
        gem_groups             = [self.gem_well],
        chemistry_description  = self.chemistry_def.description,
        is_antibody_only       = self.is_antibody_only,
        reference_path         = self.reference_path,
    )

    call DISABLE_BAMS(
        * = self,
    )

    call WRITE_POS_BAM(
        target_set_name      = self.target_set_name,
        read_chunks          = self.chunks,
        sample_barcodes_json = null,
        no_bam               = DISABLE_BAMS.disable_legacy_bam,
        *                    = _MATRIX_COMPUTER,
    )

    call _SLFE_CELLS_REPORTER as _CELLS_REPORTER(
        gem_well                = self.gem_well,
        barcode_whitelist       = self.barcode_whitelist,
        reference_path          = self.reference_path,
        recovered_cells         = self.cell_calling_config.recovered_cells,
        force_cells             = self.cell_calling_config.force_cells,
        slfe_feature_reference  = _MATRIX_COMPUTER.slfe_feature_reference,
        target_panel_summary    = self.target_panel_summary,
        target_set_name         = self.target_set_name,
        matrices_h5             = _MATRIX_COMPUTER.raw_gene_bc_matrices_h5,
        read_chunks             = self.chunks,
        report_mol_inputs       = _MATRIX_COMPUTER.report_mol_inputs,
        matrix_computer_summary = _MATRIX_COMPUTER.summary,
        barcode_summary         = _MATRIX_COMPUTER.barcode_summary,
        filtered_barcodes       = FILTER_BARCODES.filtered_barcodes,
        filter_barcodes_summary = FILTER_BARCODES.summary,
        per_barcode_metrics     = _MATRIX_COMPUTER.per_barcode_metrics,
        include_introns         = self.include_introns,
        multi_config_sha        = self.multi_config_sha,
        barcode_index           = _MATRIX_COMPUTER.barcode_index,
    )

    # this was getting disabled for legacy count runs, but some of the multiplexing PD stuff expects tags to be called in legacy count.
    # TODO(Peter Edge) we probably want to get rid of tag-calling functionality outside of multi
    call _ASSIGN_TAGS(
        filtered_barcodes     = FILTER_BARCODES.filtered_barcodes,
        filtered_feature_counts_matrix = FILTER_BARCODES.filtered_matrices_h5,
        molecule_info         = _CELLS_REPORTER.molecule_info,
        multi_graph           = self.multi_graph,
        force_sample_barcodes = self.force_sample_barcodes,
        gem_well              = self.gem_well,
    )

    # using (
    #    disabled = self.disable_multi,
    #)

    # stages/pipelines below here are for multiplexing sliced outputs
    # sample_barcodes_json is passed on by the _CELLS_REPORTER
    # and was either calculated from tags or is equal to self.force_sample_barcodes
    # in multi-gem world a couple of these things (BAM writing, metrics) should be completely migrated to MERGE_GEM_WELLS_AND_SLICE_CELLS
    # but without multi-gem there isn't explicitly a need to run that stage and it does unnecessary things like running Aggr.

    call MULTI_WRITE_PER_SAMPLE_MATRICES(
        matrix_h5            = FILTER_BARCODES.filtered_matrices_h5,
        raw_matrix_h5        = _MATRIX_COMPUTER.raw_gene_bc_matrices_h5,
        sample_barcodes_json = _ASSIGN_TAGS.assign_tags_outs.sample_barcodes_json,
        filtered_barcodes    = FILTER_BARCODES.filtered_barcodes,
    ) using (
        disabled = self.disable_multi,
    )

    call WRITE_POS_BAM as MULTI_WRITE_PER_SAMPLE_BAM(
        target_set_name      = self.target_set_name,
        read_chunks          = self.chunks,
        sample_barcodes_json = _ASSIGN_TAGS.assign_tags_outs.sample_barcodes_json,
        no_bam               = DISABLE_BAMS.disable_sample_bams,
        *                    = _MATRIX_COMPUTER,
    ) using (
        disabled = DISABLE_BAMS.disable_sample_bams,
    )

    call COLLATE_METRICS as MULTI_COLLATE_PER_SAMPLE_METRICS(
        per_barcode_metrics  = _MATRIX_COMPUTER.per_barcode_metrics_shard,
        reference_path       = self.reference_path,
        feature_reference    = _MATRIX_COMPUTER.slfe_feature_reference,
        sample_barcodes_json = _ASSIGN_TAGS.assign_tags_outs.sample_barcodes_json,
    ) using (
        disabled = self.disable_multi,
    )

    call WRITE_MOLECULE_INFO as MULTI_WRITE_PER_SAMPLE_MOLECULE_INFO(
        gem_well                = self.gem_well,
        counts_bc_order         = _MATRIX_COMPUTER.report_mol_inputs,
        reference_path          = self.reference_path,
        read_chunks             = self.chunks,
        feature_reference       = _MATRIX_COMPUTER.slfe_feature_reference,
        target_panel_summary    = self.target_panel_summary,
        target_set_name         = self.target_set_name,
        matrix_computer_summary = _MATRIX_COMPUTER.summary,
        recovered_cells         = self.cell_calling_config.recovered_cells,
        force_cells             = self.cell_calling_config.force_cells,
        filtered_barcodes       = FILTER_BARCODES.filtered_barcodes,
        include_introns         = self.include_introns,
        multi_config_sha        = self.multi_config_sha,
        sample_barcodes_json    = _ASSIGN_TAGS.assign_tags_outs.sample_barcodes_json,
        per_sample_metrics      = MULTI_COLLATE_PER_SAMPLE_METRICS.multi_metrics,
        barcode_index           = _MATRIX_COMPUTER.barcode_index,
    ) using (
        disabled = self.disable_multi,
    )

    call MERGE_METRICS(
        summaries = [
            _CELLS_REPORTER.summary,
            _ASSIGN_TAGS.assign_tags_outs.tag_call_metrics_json,
        ],
    )

    return (
        filtered_barcodes             = FILTER_BARCODES.filtered_barcodes,
        nonambient_cell_calls         = FILTER_BARCODES.nonambient_calls,
        barcode_correction_csv        = _MATRIX_COMPUTER.barcode_correction_csv,
        possorted_genome_bam          = WRITE_POS_BAM.pos_sorted_bam.bam_file,
        possorted_genome_bam_index    = WRITE_POS_BAM.pos_sorted_bam.bam_index_file,
        summary                       = MERGE_METRICS.summary,
        barcode_summary               = _MATRIX_COMPUTER.barcode_summary,
        molecule_info                 = _CELLS_REPORTER.molecule_info,
        raw_gene_bc_matrices_h5       = _MATRIX_COMPUTER.raw_gene_bc_matrices_h5,
        raw_gene_bc_matrices_mex      = _MATRIX_COMPUTER.raw_gene_bc_matrices_mex,
        filtered_gene_bc_matrices_h5  = FILTER_BARCODES.filtered_matrices_h5,
        filtered_gene_bc_matrices_mex = FILTER_BARCODES.filtered_matrices_mex,
        gem_groups                    = [self.gem_well],
        read_shards                   = _MATRIX_COMPUTER.read_shards,
        annotation_files              = _MATRIX_COMPUTER.annotation_files,
        sequencing_metrics            = _MATRIX_COMPUTER.sequencing_metrics,
        # sliced outputs for multi
        assign_tags                   = _ASSIGN_TAGS.assign_tags_outs,
        multi_pos_sorted_bam          = MULTI_WRITE_PER_SAMPLE_BAM.multi_pos_sorted_bam,
        multi_molecule_info           = MULTI_WRITE_PER_SAMPLE_MOLECULE_INFO.multi_mol_info,
        multi_metrics                 = MULTI_COLLATE_PER_SAMPLE_METRICS.multi_metrics,
        multi_matrices                = MULTI_WRITE_PER_SAMPLE_MATRICES.sample_matrices,
        sample_assignment_metrics     = _ASSIGN_TAGS.assign_tags_outs.sample_assignment_metrics,
        sample_barcodes_json          = _ASSIGN_TAGS.assign_tags_outs.sample_barcodes_json,
        # everything below here is needed only for gem well merging
        bc_umi_info                   = _MATRIX_COMPUTER.bc_umi_info,
        per_barcode_metrics           = _MATRIX_COMPUTER.per_barcode_metrics,
        per_barcode_metrics_shard     = _MATRIX_COMPUTER.per_barcode_metrics_shard,
        bam_header                    = _MATRIX_COMPUTER.bam_header,
        alignments                    = _MATRIX_COMPUTER.alignments,
        read_chunks                   = self.chunks,
        target_set_name               = self.target_set_name,
        barcode_whitelist             = self.barcode_whitelist,
        slfe_feature_reference        = _MATRIX_COMPUTER.slfe_feature_reference,
        gem_well_alignment_metrics    = _MATRIX_COMPUTER.gem_well_alignment_metrics,
        merged_subsampling_metrics    = _CELLS_REPORTER.merged_subsampling_metrics,
    )
}

#
# @include "_common_cloupe_stages.mro"
#

stage CLOUPE_PREPROCESS(
    in  string pipestance_type,
    in  string sample_id,
    in  string sample_desc,
    in  path   analysis,
    in  h5     filtered_gene_bc_matrices_h5,
    in  json   metrics_json,
    in  csv    aggregation_csv,
    in  json   gem_group_index_json,
    in  path[] tissue_image_paths,
    in  int    dark_images,
    in  csv    tissue_positions_list,
    in  txt    fiducial_positions_list,
    in  json   dzi_info,
    in  path[] dzi_tiles_paths,
    in  json   scale_factors_json,
    in  bool   no_secondary_analysis,
    in  string barcode_whitelist,
    in  json   loupe_map,
    in  string product_type,
    in  json   cells_per_tag,
    in  json   cells_per_protospacer,
    out cloupe output_for_cloupe,
    out json   gem_group_index_json,
    src py     "../rna/stages/cloupe/cloupe_preprocess",
) split (
)

#
# @include "_sc_crispr_analyzer_stages.mro"
#

stage CALL_PROTOSPACERS(
    in  csv  filtered_barcodes,
    in  h5   filtered_feature_counts_matrix,
    in  json counter_metrics_json,
    out csv  protospacer_calls_summary,
    out csv  protospacer_calls_per_cell,
    out json protospacer_call_metrics_json,
    out json cells_per_protospacer,
    out json protospacer_umi_thresholds_json,
    out csv  protospacer_umi_thresholds_csv,
    src py   "../rna/stages/feature/call_protospacers",
) using (
    mem_gb = 16,
)

stage MEASURE_PERTURBATIONS(
    in  csv  protospacer_calls_per_cell,
    in  h5   filtered_feature_counts_matrix,
    in  csv  feature_reference,
    in  bool by_feature,
    in  bool ignore_multiples,
    out csv  perturbation_efficiencies,
    out path perturbation_effects_path,
    src py   "../rna/stages/feature/measure_perturbations",
) split (
)

stage SUMMARIZE_CRISPR_ANALYSIS(
    in  csv  feature_reference,
    in  csv  protospacer_calls_summary,
    in  csv  protospacer_calls_per_cell,
    in  json cells_per_protospacer,
    in  csv  protospacer_umi_thresholds_csv,
    in  json protospacer_umi_thresholds_json,
    in  csv  perturbation_efficiencies_by_feature,
    in  csv  perturbations_efficiencies_by_target,
    in  path perturbation_effects_by_feature,
    in  path perturbation_effects_by_target,
    out path crispr_analysis,
    src py   "../rna/stages/feature/summarize_crispr_analysis",
) using (
    mem_gb = 4,
)

#
# @include "_crispr_analyzer.mro"
#

pipeline _CRISPR_ANALYZER(
    in  h5   filtered_feature_counts_matrix,
    in  csv  filtered_barcodes,
    in  csv  feature_reference,
    in  json counter_metrics_json,
    out json cells_per_protospacer,
    out json crispr_analysis_metrics,
    out path crispr_analysis,
)
{
    call CALL_PROTOSPACERS(
        filtered_barcodes    = self.filtered_barcodes,
        filtered_feature_counts_matrix = self.filtered_feature_counts_matrix,
        counter_metrics_json = self.counter_metrics_json,
    )

    call MEASURE_PERTURBATIONS as _PERTURBATIONS_BY_FEATURE(
        protospacer_calls_per_cell = CALL_PROTOSPACERS.protospacer_calls_per_cell,
        filtered_feature_counts_matrix = self.filtered_feature_counts_matrix,
        feature_reference          = self.feature_reference,
        by_feature                 = true,
        ignore_multiples           = false,
    )

    call MEASURE_PERTURBATIONS as _PERTURBATIONS_BY_TARGET(
        protospacer_calls_per_cell = CALL_PROTOSPACERS.protospacer_calls_per_cell,
        filtered_feature_counts_matrix = self.filtered_feature_counts_matrix,
        feature_reference          = self.feature_reference,
        by_feature                 = false,
        ignore_multiples           = false,
    )

    call SUMMARIZE_CRISPR_ANALYSIS(
        feature_reference          = self.feature_reference,
        protospacer_calls_summary  = CALL_PROTOSPACERS.protospacer_calls_summary,
        protospacer_calls_per_cell = CALL_PROTOSPACERS.protospacer_calls_per_cell,
        cells_per_protospacer      = CALL_PROTOSPACERS.cells_per_protospacer,
        protospacer_umi_thresholds_csv = CALL_PROTOSPACERS.protospacer_umi_thresholds_csv,
        protospacer_umi_thresholds_json = CALL_PROTOSPACERS.protospacer_umi_thresholds_json,
        perturbation_efficiencies_by_feature = _PERTURBATIONS_BY_FEATURE.perturbation_efficiencies,
        perturbations_efficiencies_by_target = _PERTURBATIONS_BY_TARGET.perturbation_efficiencies,
        perturbation_effects_by_feature = _PERTURBATIONS_BY_FEATURE.perturbation_effects_path,
        perturbation_effects_by_target = _PERTURBATIONS_BY_TARGET.perturbation_effects_path,
    )

    return (
        cells_per_protospacer   = CALL_PROTOSPACERS.cells_per_protospacer,
        crispr_analysis_metrics = CALL_PROTOSPACERS.protospacer_call_metrics_json,
        crispr_analysis         = SUMMARIZE_CRISPR_ANALYSIS.crispr_analysis,
    )
}

#
# @include "_sc_rna_counter_stages.mro"
#

stage SUMMARIZE_REPORTS(
    in  json[] summaries,
    in  string sample_id,
    in  string sample_desc,
    in  path   reference_path,
    in  path   analysis,
    in  h5     barcode_summary_h5,
    in  h5     filtered_gene_bc_matrices_h5,
    in  csv    filtered_barcodes,
    in  string barcode_whitelist,
    in  int[]  gem_groups,
    in  csv    feature_reference,
    in  string target_set_name,
    in  csv    per_feature_metrics_csv,
    in  bool   include_introns,
    out json   metrics_summary_json,
    out csv    metrics_summary_csv,
    out html   web_summary,
    out csv    feature_reference,
    out json   ws_data,
    src py     "../rna/stages/counter/summarize_reports",
) using (
    mem_gb   = 6,
    volatile = strict,
) retain (
    metrics_summary_json,
)

#
# @include "_sc_vdj_assembler_stages.mro"
#

stage VDJ_PREFLIGHT(
    in  map[]  sample_def,
    in  path   vdj_reference_path,
    in  bool   denovo,
    in  bool   full_check,
    in  int    force_cells,
    in  path   inner_enrichment_primers,
    in  string chain_type,
    src py     "../rna/stages/vdj/vdj_preflight",
)

stage REPORT_CONTIGS(
    in  path  vdj_reference_path,
    in  json  cell_barcodes,
    in  fasta contigs,
    in  json  annotations,
    in  csv   filter_summary,
    in  tsv   contig_summary,
    in  tsv   umi_summary,
    out json  summary,
    src py    "../rna/stages/vdj/report_contigs",
) split (
)

stage SUMMARIZE_VDJ_REPORTS(
    in  string sample_id,
    in  string sample_desc,
    in  string barcode_whitelist,
    in  json[] summaries,
    in  json   cell_barcodes,
    in  csv    clonotype_summary,
    in  csv    barcode_support,
    in  string receptor,
    out string receptor,
    out json   metrics_summary_json,
    out csv    metrics_summary_csv,
    out html   web_summary,
    out json   web_summary_data,
    src py     "../rna/stages/vdj/summarize_reports",
) split (
) retain (
    metrics_summary_json,
)

#
# @include "_sc_vdj_clonotype_assigner.mro"
#

pipeline CLONOTYPE_ASSIGNER(
    in  path      vdj_reference_path,
    in  json      contig_annotations,
    in  string    receptor,
    out json      contig_annotations_json,
    out csv       all_contig_annotations_csv,
    out csv       filtered_contig_annotations_csv,
    out csv       clonotypes_csv,
    out fasta     consensus_fasta,
    out fasta.fai consensus_fasta_fai,
    out fasta     concat_ref_fasta,
    out fasta.fai concat_ref_fasta_fai,
    out bam       concat_ref_bam,
    out bam.bai   concat_ref_bam_bai,
    out bam       consensus_bam,
    out bam.bai   consensus_bam_bai,
    out csv       consensus_annotations_csv,
    out json      summary,
    out tsv       airr_rearrangement,
    out pb        enclone_output,
    out bool      disable_vloupe,
    out fa        donor_ref_fa,
)
{
    call RUN_ENCLONE(
        vdj_reference_path = self.vdj_reference_path,
        contig_annotations = self.contig_annotations,
        receptor           = self.receptor,
    )

    call FILL_CLONOTYPE_INFO(
        contig_annotations = self.contig_annotations,
        enclone_output     = RUN_ENCLONE.enclone_output,
    )

    call WRITE_CONCAT_REF_OUTS(
        all_contig_annotations_json = FILL_CLONOTYPE_INFO.all_contig_annotations_json,
        enclone_output              = RUN_ENCLONE.enclone_output,
    )

    call WRITE_CONSENSUS_BAM(
        all_contig_annotations_json = FILL_CLONOTYPE_INFO.all_contig_annotations_json,
        enclone_output              = RUN_ENCLONE.enclone_output,
    )

    call WRITE_CONSENSUS_TXT(
        enclone_output = RUN_ENCLONE.enclone_output,
    )

    call WRITE_ANN_CSV(
        all_contig_annotations_json = FILL_CLONOTYPE_INFO.all_contig_annotations_json,
    )

    call CREATE_AIRR_TSV(
        contig_annotations = FILL_CLONOTYPE_INFO.all_contig_annotations_json,
        concat_ref_fasta   = WRITE_CONCAT_REF_OUTS.concat_ref_fasta,
    )

    call WRITE_CLONOTYPE_OUTS(
        enclone_output = RUN_ENCLONE.enclone_output,
        receptor       = self.receptor,
    )

    return (
        contig_annotations_json    = FILL_CLONOTYPE_INFO.all_contig_annotations_json,
        all_contig_annotations_csv = WRITE_ANN_CSV.all_contig_annotations_csv,
        filtered_contig_annotations_csv = WRITE_ANN_CSV.filtered_contig_annotations_csv,
        summary                    = RUN_ENCLONE.summary,
        clonotypes_csv             = WRITE_CLONOTYPE_OUTS.clonotypes_csv,
        consensus_annotations_csv  = WRITE_CONSENSUS_TXT.consensus_annotations_csv,
        consensus_fasta            = WRITE_CONSENSUS_TXT.consensus_fasta,
        consensus_fasta_fai        = WRITE_CONSENSUS_TXT.consensus_fasta_fai,
        concat_ref_fasta           = WRITE_CONCAT_REF_OUTS.concat_ref_fasta,
        concat_ref_fasta_fai       = WRITE_CONCAT_REF_OUTS.concat_ref_fasta_fai,
        consensus_bam              = WRITE_CONSENSUS_BAM.consensus_bam,
        consensus_bam_bai          = WRITE_CONSENSUS_BAM.consensus_bam_bai,
        concat_ref_bam             = WRITE_CONCAT_REF_OUTS.concat_ref_bam,
        concat_ref_bam_bai         = WRITE_CONCAT_REF_OUTS.concat_ref_bam_bai,
        airr_rearrangement         = CREATE_AIRR_TSV.airr_annotations,
        enclone_output             = RUN_ENCLONE.enclone_output,
        disable_vloupe             = RUN_ENCLONE.disable_vloupe,
        donor_ref_fa               = RUN_ENCLONE.donor_ref_fa,
    )
}

pipeline SC_VDJ_CLONOTYPE_ASSIGNER(
    in  path      vdj_reference_path,
    in  json      contig_annotations,
    in  string    receptor,
    in  bool      has_no_vdj_ref,
    out json      all_contig_annotations_json,
    out csv       all_contig_annotations_csv,
    out csv       filtered_contig_annotations_csv,
    out csv       clonotypes_csv,
    out fasta     consensus_fasta,
    out fasta.fai consensus_fasta_fai,
    out fasta     concat_ref_fasta,
    out fasta.fai concat_ref_fasta_fai,
    out bam       concat_ref_bam,
    out bam.bai   concat_ref_bam_bai,
    out bam       consensus_bam,
    out bam.bai   consensus_bam_bai,
    out csv       consensus_annotations_csv,
    out json      summary,
    out tsv       airr_rearrangement,
    out pb        enclone_output,
    out bool      disable_vloupe,
    out fa        donor_ref_fa,
)
{
    call CLONOTYPE_ASSIGNER(
        vdj_reference_path = self.vdj_reference_path,
        contig_annotations = self.contig_annotations,
        receptor           = self.receptor,
    ) using (
        disabled = self.has_no_vdj_ref,
    )

    call HANDLE_NO_VDJ_REF(
        asm_contig_json       = self.contig_annotations,
        clonotype_contig_json = CLONOTYPE_ASSIGNER.contig_annotations_json,
        has_no_vdj_ref        = self.has_no_vdj_ref,
    )

    return (
        all_contig_annotations_json = HANDLE_NO_VDJ_REF.final_contig_annotations,
        *                           = CLONOTYPE_ASSIGNER,
    )
}

#
# @include "_sc_vdj_contig_assembler.mro"
#

pipeline SC_VDJ_CONTIG_ASSEMBLER(
    in  int          gem_well,
    in  ChemistryDef chemistry_def,
    in  map[]        chunks,
    in  int          r1_length,
    in  int          r2_length,
    in  int          initial_reads,
    in  float        subsample_rate,
    in  path         vdj_reference_folder,
    in  bool         denovo,
    in  int          force_cells,
    in  path         inner_primers,
    in  string       receptor,
    in  csv          gex_filtered_barcodes,
    in  bool         is_antibody_only,
    in  bool         is_non_targeted_gex,
    out json         summary,
    out ReadShards   read_shards,
    out int[]        gem_groups,
    out json         raw_barcode_counts,
    out json         corrected_barcode_counts,
    out int          n50_n50_rpu,
    out int          processed_read_pairs,
    out bam          contig_bam,
    out bam.bai      contig_bam_bai,
    out tsv          summary_tsv,
    out tsv          umi_summary_tsv,
    out json         contig_annotations,
    out csv          barcode_support,
    out json[]       barcodes_in_chunks,
    out fastq        unmapped_sample_fastq,
    out txt          report,
    out int          total_read_pairs,
    out arp.bincode  assemblable_reads_per_bc,
    out smf.json     sequencing_metrics,
)
{
    call MAKE_SHARD(
        gem_well               = self.gem_well,
        chemistry_def          = self.chemistry_def,
        read_chunks            = self.chunks,
        r1_length              = self.r1_length,
        r2_length              = self.r2_length,
        subsample_rate         = self.subsample_rate,
        initial_read_pairs     = self.initial_reads,
        libraries_to_translate = [],
        reference_path         = null,
        target_features        = null,
        target_set_name        = null,
        feature_reference_path = null,
    )

    call BARCODE_CORRECTION(
        gem_well               = self.gem_well,
        barcode_counts         = MAKE_SHARD.barcode_counts,
        barcode_segment_counts = MAKE_SHARD.barcode_segment_counts,
        chemistry_def          = self.chemistry_def,
        invalid_uncorrected    = MAKE_SHARD.invalid,
        valid_read_metrics     = MAKE_SHARD.bc_correct_summary,
        libraries_to_translate = [],
    )

    call RUST_BRIDGE(
        gem_well                 = self.gem_well,
        valid_uncorrected        = MAKE_SHARD.valid,
        valid_corrected          = BARCODE_CORRECTION.valid_corrected,
        raw_barcode_counts       = MAKE_SHARD.barcode_counts,
        corrected_barcode_counts = BARCODE_CORRECTION.corrected_barcode_counts,
        paired_end               = MAKE_SHARD.paired_end,
    )

    call ASSEMBLE_VDJ(
        bc_sorted_rna_reads      = RUST_BRIDGE.bc_sorted_rna_reads,
        paired_end               = MAKE_SHARD.paired_end,
        vdj_reference_path       = self.vdj_reference_folder,
        n50_n50_rpu              = RUST_BRIDGE.n50_n50_rpu,
        npairs                   = RUST_BRIDGE.processed_read_pairs,
        receptor                 = self.receptor,
        denovo                   = self.denovo,
        force_cells              = self.force_cells,
        inner_enrichment_primers = self.inner_primers,
        total_read_pairs         = MAKE_SHARD.total_read_pairs,
        corrected_bc_counts      = RUST_BRIDGE.corrected_barcode_counts_json,
    )

    call HANDLE_GEX_CELLS(
        asm_contig_annotations = ASSEMBLE_VDJ.contig_annotations,
        filtered_barcodes      = self.gex_filtered_barcodes,
        is_antibody_only       = self.is_antibody_only,
        is_non_targeted_gex    = self.is_non_targeted_gex,
    )

    call MERGE_METRICS(
        summaries = [
            MAKE_SHARD.summary,
            BARCODE_CORRECTION.summary,
            ASSEMBLE_VDJ.metrics_summary_json,
        ],
    )

    return (
        summary                  = MERGE_METRICS.summary,
        read_shards              = {
            corrected_reads: BARCODE_CORRECTION.valid_corrected,
            invalid_reads:   BARCODE_CORRECTION.invalid,
            valid_reads:     MAKE_SHARD.valid,
        },
        gem_groups               = RUST_BRIDGE.gem_groups,
        raw_barcode_counts       = RUST_BRIDGE.raw_barcode_counts_json,
        corrected_barcode_counts = RUST_BRIDGE.corrected_barcode_counts_json,
        n50_n50_rpu              = RUST_BRIDGE.n50_n50_rpu,
        processed_read_pairs     = RUST_BRIDGE.processed_read_pairs,
        contig_bam               = ASSEMBLE_VDJ.contig_bam,
        contig_bam_bai           = ASSEMBLE_VDJ.contig_bam_bai,
        summary_tsv              = ASSEMBLE_VDJ.summary_tsv,
        umi_summary_tsv          = ASSEMBLE_VDJ.umi_summary_tsv,
        contig_annotations       = HANDLE_GEX_CELLS.contig_annotations,
        barcode_support          = ASSEMBLE_VDJ.barcode_support,
        barcodes_in_chunks       = ASSEMBLE_VDJ.barcodes_in_chunks,
        unmapped_sample_fastq    = ASSEMBLE_VDJ.unmapped_sample_fastq,
        report                   = ASSEMBLE_VDJ.report,
        total_read_pairs         = MAKE_SHARD.total_read_pairs,
        assemblable_reads_per_bc = ASSEMBLE_VDJ.assemblable_reads_per_bc,
        sequencing_metrics       = MAKE_SHARD.sequencing_metrics,
    )
}

#
# @include "_sc_rna_targeted_analyzer_stages.mro"
#

stage SUMMARIZE_TARGETED_ANALYSIS(
    in  json[] summaries,
    out json   summary,
    src py     "../rna/stages/targeted/summarize_targeted_analysis",
)

stage CALCULATE_TARGETED_METRICS(
    in  h5   molecule_info,
    in  h5   filtered_gene_bc_matrices,
    in  json basic_counter_summary,
    in  json targeted_summary,
    out json summary,
    out csv  per_feature_metrics_csv,
    src py   "../rna/stages/targeted/calculate_targeted_metrics",
) split (
)

#
# @include "_targeted_analyzer.mro"
#

pipeline _TARGETED_ANALYZER(
    in  h5   molecule_info,
    in  h5   filtered_gene_bc_matrices,
    in  csv  filtered_barcodes,
    in  json basic_counter_summary,
    in  json targeted_summary,
    out json targeted_analysis_metrics,
    out csv  per_feature_metrics_csv,
)
{
    call CALCULATE_TARGETED_METRICS(
        molecule_info             = self.molecule_info,
        filtered_gene_bc_matrices = self.filtered_gene_bc_matrices,
        basic_counter_summary     = self.basic_counter_summary,
        targeted_summary          = self.targeted_summary,
    )

    call SUBSAMPLE_READS as SUBSAMPLE_ON_TARGET_READS(
        molecule_info     = self.molecule_info,
        filtered_barcodes = self.filtered_barcodes,
        target_mode       = "ontarget",
    )

    call SUBSAMPLE_READS as SUBSAMPLE_OFF_TARGET_READS(
        molecule_info     = self.molecule_info,
        filtered_barcodes = self.filtered_barcodes,
        target_mode       = "offtarget",
    )

    call SUMMARIZE_TARGETED_ANALYSIS(
        summaries = [
            CALCULATE_TARGETED_METRICS.summary,
            SUBSAMPLE_ON_TARGET_READS.summary,
            SUBSAMPLE_OFF_TARGET_READS.summary,
        ],
    )

    return (
        targeted_analysis_metrics = SUMMARIZE_TARGETED_ANALYSIS.summary,
        per_feature_metrics_csv   = CALCULATE_TARGETED_METRICS.per_feature_metrics_csv,
    )
}

#
# @include "_vloupe_stages.mro"
#

stage VLOUPE_PREPROCESS(
    in  string pipestance_type,
    in  string sample_id,
    in  string sample_desc,
    in  pb     enclone_output,
    in  bool   disable_vloupe,
    out vloupe output_for_vloupe,
    src py     "../rna/stages/vloupe/vloupe_preprocess",
) using (
    mem_gb = 15,
)

#
# @include "_sc_multi_defs.mro"
#

stage _MAKE_VDJ_CONFIG(
    in  VdjInputsCS vdj_t_input,
    in  VdjInputsCS vdj_b_input,
    in  bool        disable_vdj,
    in  path        vdj_reference_path,
    out bool        disable_vdj_b,
    out bool        disable_vdj_t,
    out bool        has_no_vdj_ref,
    src py          "../rna/stages/common/make_vdj_config",
)

# This is a pipeline, so that martian can determine at compile time that
# config.disable_count in the output is the same as basic_config.disable_count,
# which has several beneficial knock-on effects.
pipeline MAKE_FULL_CONFIG(
    in  VdjInputsCS         vdj_t_input,
    in  VdjInputsCS         vdj_b_input,
    in  BasicPipelineConfig basic_config,
    in  path                vdj_reference_path,
    out FullPipelineConfig  config,
)
{
    call _MAKE_VDJ_CONFIG(
        vdj_t_input        = self.vdj_t_input,
        vdj_b_input        = self.vdj_b_input,
        vdj_reference_path = self.vdj_reference_path,
        *                  = self.basic_config,
    )

    return (
        config = {
            disable_count:       self.basic_config.disable_count,
            disable_multi:       self.basic_config.disable_multi,
            disable_multi_count: self.basic_config.disable_multi_count,
            disable_vdj_b:       _MAKE_VDJ_CONFIG.disable_vdj_b,
            disable_vdj_t:       _MAKE_VDJ_CONFIG.disable_vdj_t,
            has_no_vdj_ref:      _MAKE_VDJ_CONFIG.has_no_vdj_ref,
        },
    )
}

stage SPLIT_VDJ_INPUTS(
    in  VdjInputs[] vdj_inputs,
    in  string[]    vdj_chemistry_types,
    in  string[]    vdj_receptors,
    out VdjInputs   vdj_t_input,
    out string      vdj_t_chemistry_type,
    out string      vdj_t_receptor,
    out VdjInputs   vdj_b_input,
    out string      vdj_b_chemistry_type,
    out string      vdj_b_receptor,
    src py          "../rna/stages/vdj/split_vdj_inputs",
)

stage PICK_VDJ_OUTS(
    in  bool         disable_vdj_t,
    in  bool         disable_vdj_b,
    in  VdjOutputsCS vdj_t_outs,
    in  html         vdj_t_web_summary,
    in  VdjOutputsCS vdj_b_outs,
    in  html         vdj_b_web_summary,
    out VdjOutputsCS vdj_outs,
    out html         web_summary,
    src py           "../rna/stages/vdj/pick_vdj_outs",
)

stage MERGE_GEM_WELL_CSVS(
    in  csv[] filtered_barcodes,
    in  csv[] barcode_correction_csv,
    out csv   filtered_barcodes,
    out csv   barcode_correction_csv,
    src py    "../rna/stages/multi/merge_gem_well_filtered_barcode_csvs",
) using (
    volatile = strict,
)

###############################################################################
# Chemistry detector pipelines

pipeline VDJ_CHEMISTRY_DETECTOR(
    in  path               vdj_reference_path,
    in  VdjChemistryInputs vdj_chem_inputs,
    in  string             gex_chemistry_name,
    in  map[]              gex_sample_def,
    in  ChemistryDef       gex_custom_chemistry_def,
    in  bool               disable_count,
    out string             chemistry_type,
    out string             receptor,
    out string             chain_type,
)
{
    call DETECT_CHEMISTRY(
        reference_path      = null,
        allowed_chems       = [
            "SCVDJ_auto",
            "custom",
            "SCVDJ",
            "SCVDJ-R2",
            "SCVDJ-R1",
        ],
        chemistry_name_spec = self.vdj_chem_inputs.chemistry,
        *                   = self.vdj_chem_inputs,
    )

    call DETECT_VDJ_RECEPTOR(
        force_receptor     = self.vdj_chem_inputs.chain_type,
        vdj_reference_path = self.vdj_reference_path,
        chemistry          = DETECT_CHEMISTRY.chemistry_type,
        sample_def         = self.vdj_chem_inputs.sample_def,
    )

    call CHECK_BARCODES_COMPATIBILITY_VDJ(
        vdj_chemistry_name          = DETECT_CHEMISTRY.chemistry_type,
        vdj_sample_def              = self.vdj_chem_inputs.sample_def,
        gex_chemistry_name          = self.gex_chemistry_name,
        gex_sample_def              = self.gex_sample_def,
        gex_custom_chemistry_def    = self.gex_custom_chemistry_def,
        vdj_custom_chemistry_def    = self.vdj_chem_inputs.custom_chemistry_def,
        enforce_library_concordance = true,
    ) using (
        disabled = self.disable_count,
    )

    return (
        chemistry_type = DETECT_CHEMISTRY.chemistry_type,
        receptor       = DETECT_VDJ_RECEPTOR.receptor,
        chain_type     = self.vdj_chem_inputs.chain_type,
    )
}

# Detect chemistry and check barcodes compatibility for a single gem well, gene expression and vdj
pipeline _GEM_WELL_CHEMISTRY_DETECTOR(
    in  GemWellChemistryInputs       gem_well_inputs,
    in  VdjGenInputs                 vdj_gen_inputs,
    in  BasicPipelineConfig          basic_config,
    in  string[]                     count_allowed_chems,
    out map[]                        sample_defs_count,
    out DETECT_CHEMISTRY             detect_count_chem,
    out VDJ_CHEMISTRY_DETECTOR[]     detect_vdj_chem,
    out CHECK_BARCODES_COMPATIBILITY check_barcodes_compatibility,
)
{
    call DETECT_CHEMISTRY as DETECT_COUNT_CHEMISTRY(
        chemistry_name_spec = self.gem_well_inputs.count_inputs.chemistry,
        allowed_chems       = self.count_allowed_chems,
        *                   = self.gem_well_inputs.count_inputs,
    ) using (
        disabled = self.basic_config.disable_count,
    )

    map call VDJ_CHEMISTRY_DETECTOR(
        vdj_chem_inputs          = split self.gem_well_inputs.vdj_inputs,
        vdj_reference_path       = self.vdj_gen_inputs.vdj_reference_path,
        gex_chemistry_name       = DETECT_COUNT_CHEMISTRY.chemistry_type,
        gex_sample_def           = self.gem_well_inputs.count_inputs.sample_def,
        gex_custom_chemistry_def = self.gem_well_inputs.count_inputs.custom_chemistry_def,
        disable_count            = self.basic_config.disable_count,
    ) using (
        disabled = self.basic_config.disable_vdj,
    )

    call CHECK_BARCODES_COMPATIBILITY(
        chemistry_name              = DETECT_COUNT_CHEMISTRY.chemistry_type,
        custom_chemistry_def        = self.gem_well_inputs.count_inputs.custom_chemistry_def,
        sample_def                  = self.gem_well_inputs.count_inputs.sample_def,
        enforce_library_concordance = self.gem_well_inputs.count_inputs.enforce_library_concordance,
    ) using (
        disabled = self.basic_config.disable_count,
    )

    return (
        sample_defs_count            = self.gem_well_inputs.count_inputs.sample_def,
        detect_count_chem            = DETECT_COUNT_CHEMISTRY,
        detect_vdj_chem              = VDJ_CHEMISTRY_DETECTOR,
        check_barcodes_compatibility = CHECK_BARCODES_COMPATIBILITY,
    )
}

# Detect chemistry and check barcodes compatibility for multiple gem wells, gene expression and vdj
pipeline MULTI_CHEMISTRY_DETECTOR(
    in  GemWellChemistryInputs[] multi_gem_well_inputs,
    in  VdjGenInputs             vdj_gen_inputs,
    in  BasicPipelineConfig      basic_config,
    in  string[]                 count_allowed_chems,
    out map                      library_to_chemistry,
    out string[]                 libraries_to_translate,
    out bool                     is_antibody_only,
    # remove later
    out GemWellDetectChemistry   legacy,
)
{
    map call _GEM_WELL_CHEMISTRY_DETECTOR(
        gem_well_inputs     = split self.multi_gem_well_inputs,
        vdj_gen_inputs      = self.vdj_gen_inputs,
        basic_config        = self.basic_config,
        count_allowed_chems = self.count_allowed_chems,
    )

    call COMBINE_GEM_WELL_CHEMISTRIES(
        gem_well_detect_chemistry = _GEM_WELL_CHEMISTRY_DETECTOR,
    )

    return (
        library_to_chemistry   = COMBINE_GEM_WELL_CHEMISTRIES.library_to_chemistry,
        libraries_to_translate = COMBINE_GEM_WELL_CHEMISTRIES.libraries_to_translate,
        is_antibody_only       = COMBINE_GEM_WELL_CHEMISTRIES.is_antibody_only,
        legacy                 = COMBINE_GEM_WELL_CHEMISTRIES.legacy,
    )
}

###############################################################################
# Gem well processor pipelines
###############################################################################
pipeline COUNT_GEM_WELL_PROCESSOR(
    in  int                   gem_group,
    in  string                sample_id,
    in  string                multi_config_sha,
    in  CounterInputs         inputs,
    in  DETECT_CHEMISTRY      chem,
    in  string[]              libraries_to_translate,
    in  bool                  is_pd,
    in  bool                  disable_multi,
    in  json                  multi_graph,
    out PARSE_TARGET_FEATURES target_outs,
    out MULTI_SETUP_CHUNKS    setup_chunks_outs,
    out _BASIC_SC_RNA_COUNTER basic_counter_outs,
)
{
    call PARSE_TARGET_FEATURES(
        rps_limit = null,
        *         = self.inputs,
    )

    call MULTI_SETUP_CHUNKS(
        sample_id            = self.sample_id,
        chemistry_name       = self.chem.chemistry_type,
        default_library_type = null,
        *                    = self.inputs,
    ) using (
        volatile = true,
    )

    call _BASIC_SC_RNA_COUNTER(
        gem_well                  = self.gem_group,
        sample_id                 = self.sample_id,
        multi_config_sha          = self.multi_config_sha,
        is_pd                     = self.is_pd,
        chemistry_def             = MULTI_SETUP_CHUNKS.chemistry_def,
        barcode_whitelist         = MULTI_SETUP_CHUNKS.barcode_whitelist,
        is_antibody_only          = self.chem.is_antibody_only,
        libraries_to_translate    = self.libraries_to_translate,
        chunks                    = MULTI_SETUP_CHUNKS.chunks,
        target_panel_summary      = PARSE_TARGET_FEATURES.summary,
        disable_target_umi_filter = PARSE_TARGET_FEATURES.disable_target_umi_filter,
        rps_limit                 = null,
        target_features           = PARSE_TARGET_FEATURES.target_gene_indices,
        target_set                = PARSE_TARGET_FEATURES.target_panel,
        target_set_name           = PARSE_TARGET_FEATURES.target_set_name,
        disable_multi             = self.disable_multi,
        multi_graph               = self.multi_graph,
        *                         = self.inputs,
    )

    return (
        target_outs        = PARSE_TARGET_FEATURES,
        setup_chunks_outs  = MULTI_SETUP_CHUNKS,
        basic_counter_outs = _BASIC_SC_RNA_COUNTER,
    )
}

pipeline VDJ_GEM_WELL_PROCESSOR(
    in  string                  sample_id,
    in  string                  chemistry_type,
    in  string                  receptor,
    in  VdjAssemblerInputs      inputs,
    in  VdjGenInputs            gen_inputs,
    in  csv                     gex_filtered_barcodes,
    in  bool                    is_antibody_only,
    in  bool                    is_non_targeted_gex,
    out MULTI_SETUP_CHUNKS      setup_chunks_outs,
    out SC_VDJ_CONTIG_ASSEMBLER assembler_outs,
)
{
    call MULTI_SETUP_CHUNKS(
        sample_id            = self.sample_id,
        chemistry_name       = self.chemistry_type,
        default_library_type = "VDJ",
        *                    = self.inputs,
    ) using (
        volatile = true,
    )

    call SC_VDJ_CONTIG_ASSEMBLER(
        gem_well              = 1,
        chemistry_def         = MULTI_SETUP_CHUNKS.chemistry_def,
        chunks                = MULTI_SETUP_CHUNKS.chunks,
        inner_primers         = self.inputs.inner_enrichment_primers,
        vdj_reference_folder  = self.gen_inputs.vdj_reference_path,
        receptor              = self.receptor,
        gex_filtered_barcodes = self.gex_filtered_barcodes,
        is_antibody_only      = self.is_antibody_only,
        is_non_targeted_gex   = self.is_non_targeted_gex,
        *                     = self.inputs,
    )

    return (
        setup_chunks_outs = MULTI_SETUP_CHUNKS,
        assembler_outs    = SC_VDJ_CONTIG_ASSEMBLER,
    )
}

stage _FORCE_SAMPLE_DEF_GEM_WELL(
    in  map[] sample_def,
    in  int   gem_group,
    out map[] sample_def,
    src py    "../rna/stages/multi/_force_sample_def_gem_well",
) using (
    volatile = strict,
)

stage BUILD_MULTI_WEB_SUMMARY(
    in  map       web_summary_data,
    in  map       metrics_summary_csvs,
    out map<html> web_summaries,
    out map<csv>  metrics_summary_csvs,
    src py        "../rna/stages/common/build_multi_web_summary",
)

stage BUILD_MULTI_GRAPH_VIEW(
    in  json multi_graph,
    out svg  view,
    src py   "../rna/stages/multi/build_multi_graph_view",
) using (
    volatile = strict,
)

pipeline FORCE_SAMPLE_DEF_GEM_WELL(
    in  CounterInputs count_inputs,
    in  int           gem_group,
    out CounterInputs count_inputs,
)
{
    call _FORCE_SAMPLE_DEF_GEM_WELL(
        sample_def = self.count_inputs.sample_def,
        gem_group  = self.gem_group,
    )

    return (
        count_inputs = {
            aligner:                     self.count_inputs.aligner,
            cell_calling_config:         self.count_inputs.cell_calling_config,
            custom_chemistry_def:        self.count_inputs.custom_chemistry_def,
            enforce_library_concordance: self.count_inputs.enforce_library_concordance,
            feature_reference:           self.count_inputs.feature_reference,
            force_sample_barcodes:       self.count_inputs.force_sample_barcodes,
            gene_index:                  self.count_inputs.gene_index,
            include_introns:             self.count_inputs.include_introns,
            initial_reads:               self.count_inputs.initial_reads,
            no_bam:                      self.count_inputs.no_bam,
            no_target_umi_filter:        self.count_inputs.no_target_umi_filter,
            primers:                     self.count_inputs.primers,
            r1_length:                   self.count_inputs.r1_length,
            r2_length:                   self.count_inputs.r2_length,
            reference_path:              self.count_inputs.reference_path,
            sample_def:                  _FORCE_SAMPLE_DEF_GEM_WELL.sample_def,
            subsample_rate:              self.count_inputs.subsample_rate,
            trim_polya_min_score:        self.count_inputs.trim_polya_min_score,
            trim_tso_min_score:          self.count_inputs.trim_tso_min_score,
        },
    )
}

# NOTE: This needs to be map-called if we have data from multiple
# gem wells
pipeline MULTI_GEM_WELL_PROCESSOR(
    in  int                      gem_group,
    in  string                   sample_id,
    in  string                   multi_config_sha,
    in  FullPipelineConfig       config,
    in  bool                     is_pd,
    in  CounterInputs            count_inputs,
    in  DETECT_CHEMISTRY         count_chem,
    in  string[]                 libraries_to_translate,
    in  json                     multi_graph,
    in  VdjAssemblerInputs       vdj_t_inputs,
    in  string                   vdj_t_chem_type,
    in  string                   vdj_t_receptor,
    in  VdjAssemblerInputs       vdj_b_inputs,
    in  string                   vdj_b_chem_type,
    in  string                   vdj_b_receptor,
    in  VdjGenInputs             vdj_gen_inputs,
    out COUNT_GEM_WELL_PROCESSOR count,
    out VDJ_GEM_WELL_PROCESSOR   vdj_t,
    out VDJ_GEM_WELL_PROCESSOR   vdj_b,
)
{
    call COUNT_GEM_WELL_PROCESSOR(
        gem_group              = self.gem_group,
        sample_id              = self.sample_id,
        multi_config_sha       = self.multi_config_sha,
        inputs                 = self.count_inputs,
        chem                   = self.count_chem,
        libraries_to_translate = self.libraries_to_translate,
        is_pd                  = self.is_pd,
        disable_multi          = self.config.disable_multi,
        multi_graph            = self.multi_graph,
    ) using (
        disabled = self.config.disable_count,
    )

    # TODO: accept the raw matrix from COUNT_GEM_WELL_PROCESSOR
    # to aid in cell calling.
    call VDJ_GEM_WELL_PROCESSOR as VDJ_T_GEM_WELL_PROCESSOR(
        sample_id             = self.sample_id,
        chemistry_type        = self.vdj_t_chem_type,
        receptor              = self.vdj_t_receptor,
        inputs                = self.vdj_t_inputs,
        gen_inputs            = self.vdj_gen_inputs,
        gex_filtered_barcodes = COUNT_GEM_WELL_PROCESSOR.basic_counter_outs.filtered_barcodes,
        is_antibody_only      = self.count_chem.is_antibody_only,
        is_non_targeted_gex   = COUNT_GEM_WELL_PROCESSOR.target_outs.disable_targeted,
    ) using (
        disabled = self.config.disable_vdj_t,
    )

    # TODO: accept the raw matrix from COUNT_GEM_WELL_PROCESSOR
    # to aid in cell calling.
    call VDJ_GEM_WELL_PROCESSOR as VDJ_B_GEM_WELL_PROCESSOR(
        sample_id             = self.sample_id,
        chemistry_type        = self.vdj_b_chem_type,
        receptor              = self.vdj_b_receptor,
        inputs                = self.vdj_b_inputs,
        gen_inputs            = self.vdj_gen_inputs,
        gex_filtered_barcodes = COUNT_GEM_WELL_PROCESSOR.basic_counter_outs.filtered_barcodes,
        is_antibody_only      = self.count_chem.is_antibody_only,
        is_non_targeted_gex   = COUNT_GEM_WELL_PROCESSOR.target_outs.disable_targeted,
    ) using (
        disabled = self.config.disable_vdj_b,
    )

    # TODO: Add tag calling pipeline here

    return (
        count = COUNT_GEM_WELL_PROCESSOR,
        vdj_t = VDJ_T_GEM_WELL_PROCESSOR,
        vdj_b = VDJ_B_GEM_WELL_PROCESSOR,
    )
}

###############################################################################
# Count Analyzer
###############################################################################
pipeline COUNT_ANALYZER(
    in  h5                    filtered_matrices_h5,
    in  h5                    molecule_info,
    in  CounterInputs         count_inputs,
    in  bool                  no_secondary_analysis,
    in  bool                  disable_rna,
    in  bool                  disable_crispr,
    in  bool                  disable_targeted,
    in  h5                    filtered_feature_counts_matrix,
    in  csv                   filtered_barcodes,
    in  csv                   feature_reference,
    in  json                  counter_metrics_json,
    in  PARSE_TARGET_FEATURES parse_target_features,
    out SC_RNA_ANALYZER       rna_analyzer,
    out _CRISPR_ANALYZER      crispr_analyzer,
    out _TARGETED_ANALYZER    targeted_analyzer,
)
{
    # TODO: It would be cleaner if _CRISPR_ANALYZER can go inside SC_RNA_ANALYZER
    # TODO: This pipeline can be cleaned up using structs for various sets of parameters
    call SC_RNA_ANALYZER(
        filtered_matrices_h5       = self.filtered_matrices_h5,
        no_secondary_analysis      = self.no_secondary_analysis,
        use_genes                  = null,
        skip_multigenome_analysis  = false,
        chemistry_batch_correction = false,
        aggr_library_info          = null,
        num_analysis_bcs           = null,
        num_pca_bcs                = null,
        num_pca_genes              = null,
        num_principal_comps        = null,
        cbc_knn                    = null,
        cbc_alpha                  = null,
        cbc_sigma                  = null,
        cbc_realign_panorama       = null,
        max_clusters               = null,
        graphclust_neighbors       = null,
        neighbor_a                 = null,
        neighbor_b                 = null,
        tsne_perplexity            = null,
        tsne_input_pcs             = null,
        random_seed                = null,
        tsne_theta                 = null,
        exclude_genes              = null,
        use_bcs                    = null,
        tsne_max_dims              = null,
        tsne_max_iter              = null,
        tsne_stop_lying_iter       = null,
        tsne_mom_switch_iter       = null,
        umap_n_neighbors           = null,
        umap_input_pcs             = null,
        umap_max_dims              = null,
        umap_min_dist              = null,
        umap_metric                = null,
        # NOTE: this is null because the cells are already forced in FILTER_BARCODES
        force_cells                = null,
    ) using (
        disabled = self.disable_rna,
    )

    call _CRISPR_ANALYZER(
        filtered_feature_counts_matrix = self.filtered_feature_counts_matrix,
        filtered_barcodes    = self.filtered_barcodes,
        feature_reference    = self.feature_reference,
        counter_metrics_json = self.counter_metrics_json,
    ) using (
        disabled = self.disable_crispr,
    )

    call _TARGETED_ANALYZER(
        molecule_info             = self.molecule_info,
        filtered_gene_bc_matrices = self.filtered_matrices_h5,
        filtered_barcodes         = self.filtered_barcodes,
        basic_counter_summary     = self.counter_metrics_json,
        targeted_summary          = self.parse_target_features.summary,
        *                         = self.count_inputs,
    ) using (
        disabled = self.disable_targeted,
    )

    return (
        rna_analyzer      = SC_RNA_ANALYZER,
        crispr_analyzer   = _CRISPR_ANALYZER,
        targeted_analyzer = _TARGETED_ANALYZER,
    )
}

###############################################################################
# Reporter pipeline
###############################################################################
pipeline VDJ_REPORTER(
    in  string                    sample_id,
    in  string                    sample_desc,
    in  string                    multi_config_sha,
    in  string                    receptor,
    in  path                      vdj_reference_path,
    in  bool                      has_no_vdj_ref,
    in  VDJ_GEM_WELL_PROCESSOR    vdj_gw,
    in  SC_VDJ_CLONOTYPE_ASSIGNER vdj_clonotype,
    out VdjReport                 report,
)
{
    call WRITE_CONTIG_OUTS(
        contig_annotations       = self.vdj_clonotype.all_contig_annotations_json,
        total_read_pairs         = self.vdj_gw.assembler_outs.total_read_pairs,
        corrected_bc_counts      = self.vdj_gw.assembler_outs.corrected_barcode_counts,
        assemblable_reads_per_bc = self.vdj_gw.assembler_outs.assemblable_reads_per_bc,
    )

    call REPORT_CONTIGS(
        vdj_reference_path = self.vdj_reference_path,
        cell_barcodes      = WRITE_CONTIG_OUTS.cell_barcodes,
        contigs            = WRITE_CONTIG_OUTS.contig_fasta,
        annotations        = self.vdj_clonotype.all_contig_annotations_json,
        filter_summary     = null,
        contig_summary     = self.vdj_gw.assembler_outs.summary_tsv,
        umi_summary        = self.vdj_gw.assembler_outs.umi_summary_tsv,
    ) using (
        volatile = true,
    )

    call SUMMARIZE_VDJ_REPORTS(
        sample_id         = self.sample_id,
        sample_desc       = self.sample_desc,
        barcode_whitelist = self.vdj_gw.setup_chunks_outs.barcode_whitelist,
        barcode_support   = self.vdj_gw.assembler_outs.barcode_support,
        summaries         = [
            self.vdj_gw.assembler_outs.summary,
            REPORT_CONTIGS.summary,
            self.vdj_clonotype.summary,
            WRITE_CONTIG_OUTS.summary,
        ],
        cell_barcodes     = WRITE_CONTIG_OUTS.cell_barcodes,
        clonotype_summary = self.vdj_clonotype.clonotypes_csv,
        receptor          = self.receptor,
    )

    call WRITE_CONTIG_PROTO(
        vdj_reference_path      = self.vdj_reference_path,
        contig_annotations_json = self.vdj_clonotype.all_contig_annotations_json,
        metrics_summary_json    = SUMMARIZE_VDJ_REPORTS.metrics_summary_json,
        receptor                = self.receptor,
        # TODO: NEEDS TO BE UPDATED FOR MULTI GEM WELL
        gem_wells               = [1],
        cell_barcodes           = WRITE_CONTIG_OUTS.cell_barcodes,
        sample_id               = self.sample_id,
        sample_desc             = self.sample_desc,
        multi_config_sha        = self.multi_config_sha,
    ) using (
        disabled = self.has_no_vdj_ref,
    )

    call VLOUPE_PREPROCESS(
        pipestance_type = "SC_VDJ_ASSEMBLER_CS",
        sample_id       = self.sample_id,
        sample_desc     = self.sample_desc,
        enclone_output  = self.vdj_clonotype.enclone_output,
        disable_vloupe  = self.vdj_clonotype.disable_vloupe,
    )

    return (
        report = {
            annotations_bed:          WRITE_CONTIG_OUTS.annotations_bed,
            cell_barcodes:            WRITE_CONTIG_OUTS.cell_barcodes,
            contig_fasta:             WRITE_CONTIG_OUTS.contig_fasta,
            contig_fasta_fai:         WRITE_CONTIG_OUTS.contig_fasta_fai,
            contig_fastq:             WRITE_CONTIG_OUTS.contig_fastq,
            filtered_contig_fasta:    WRITE_CONTIG_OUTS.filtered_contig_fasta,
            filtered_contig_fastq:    WRITE_CONTIG_OUTS.filtered_contig_fastq,
            metrics_summary_csv:      SUMMARIZE_VDJ_REPORTS.metrics_summary_csv,
            metrics_summary_json:     SUMMARIZE_VDJ_REPORTS.metrics_summary_json,
            productive_cell_barcodes: WRITE_CONTIG_OUTS.paired_cell_barcodes,
            receptor:                 SUMMARIZE_VDJ_REPORTS.receptor,
            vdj_contig_info:          WRITE_CONTIG_PROTO.vdj_contig_info,
            vloupe:                   VLOUPE_PREPROCESS.output_for_vloupe,
            web_summary:              SUMMARIZE_VDJ_REPORTS.web_summary,
            web_summary_data:         SUMMARIZE_VDJ_REPORTS.web_summary_data,
        },
    )
}

stage CHOOSE_CLOUPE(
    in  cloupe      library_cloupe,
    in  map<cloupe> sample_cloupe,
    out cloupe      cloupe,
    src py          "../rna/stages/multi/choose_cloupe",
) using (
    volatile = strict,
)

pipeline MULTI_REPORTER(
    in  string                    sample_id,
    in  string                    sample_desc,
    in  string                    multi_config_sha,
    in  FullPipelineConfig        config,
    in  string                    count_pipestance_type,
    in  csv                       feature_reference,
    in  path                      reference_path,
    in  COUNT_GEM_WELL_PROCESSOR  count_gw,
    in  bool                      include_introns,
    in  SC_RNA_ANALYZER           count_analyzer,
    in  _CRISPR_ANALYZER          crispr_analyzer,
    in  _TARGETED_ANALYZER        targeted_analyzer,
    in  path                      vdj_reference_path,
    in  string                    vdj_t_receptor,
    in  VDJ_GEM_WELL_PROCESSOR    vdj_t_gw,
    in  SC_VDJ_CLONOTYPE_ASSIGNER vdj_t_clonotype,
    in  string                    vdj_b_receptor,
    in  VDJ_GEM_WELL_PROCESSOR    vdj_b_gw,
    in  SC_VDJ_CLONOTYPE_ASSIGNER vdj_b_clonotype,
    in  h5                        barcode_summary,
    in  csv                       filtered_barcodes,
    in  AssignTagsOuts            assign_tags_outs,
    in  bool                      disable_library_cloupe,
    in  map<cloupe>               sample_cloupe,
    out SUMMARIZE_REPORTS         count_summary,
    out cloupe                    cloupe,
    out VdjReport                 vdj_t_report,
    out VdjReport                 vdj_b_report,
    out map<json>                 barcode_rank_plots,
    out json                      jibes_biplot_histogram,
    out json                      targeted_plot,
    out json                      cmo_tsne_plot,
)
{
    call SUMMARIZE_REPORTS(
        summaries                    = [
            self.count_gw.basic_counter_outs.summary,
            self.count_analyzer.summary,
            self.crispr_analyzer.crispr_analysis_metrics,
            self.targeted_analyzer.targeted_analysis_metrics,
            self.count_gw.target_outs.summary,
        ],
        sample_id                    = self.sample_id,
        sample_desc                  = self.sample_desc,
        reference_path               = self.reference_path,
        analysis                     = self.count_analyzer.analysis,
        barcode_summary_h5           = self.count_gw.basic_counter_outs.barcode_summary,
        filtered_gene_bc_matrices_h5 = self.count_gw.basic_counter_outs.filtered_gene_bc_matrices_h5,
        filtered_barcodes            = self.count_gw.basic_counter_outs.filtered_barcodes,
        barcode_whitelist            = self.count_gw.setup_chunks_outs.barcode_whitelist,
        gem_groups                   = self.count_gw.basic_counter_outs.gem_groups,
        feature_reference            = self.feature_reference,
        target_set_name              = self.count_gw.target_outs.target_set_name,
        per_feature_metrics_csv      = self.targeted_analyzer.per_feature_metrics_csv,
        include_introns              = self.include_introns,
    ) using (
        disabled = self.config.disable_count,
    )

    call CLOUPE_PREPROCESS(
        pipestance_type              = self.count_pipestance_type,
        sample_id                    = self.sample_id,
        sample_desc                  = self.sample_desc,
        analysis                     = self.count_analyzer.analysis,
        filtered_gene_bc_matrices_h5 = self.count_gw.basic_counter_outs.filtered_gene_bc_matrices_h5,
        metrics_json                 = SUMMARIZE_REPORTS.metrics_summary_json,
        aggregation_csv              = null,
        gem_group_index_json         = null,
        tissue_image_paths           = null,
        dark_images                  = null,
        tissue_positions_list        = null,
        fiducial_positions_list      = null,
        dzi_info                     = null,
        dzi_tiles_paths              = null,
        scale_factors_json           = null,
        no_secondary_analysis        = false,
        barcode_whitelist            = null,
        loupe_map                    = null,
        product_type                 = "sc",
        cells_per_tag                = self.assign_tags_outs.cells_per_tag,
        cells_per_protospacer        = self.crispr_analyzer.cells_per_protospacer,
    ) using (
        disabled = self.disable_library_cloupe,
    )

    call CHOOSE_CLOUPE(
        library_cloupe = CLOUPE_PREPROCESS.output_for_cloupe,
        sample_cloupe  = self.sample_cloupe,
    ) using (
        disabled = self.config.disable_count,
    )

    call VDJ_REPORTER as VDJ_T_REPORTER(
        sample_id          = self.sample_id,
        sample_desc        = self.sample_desc,
        receptor           = self.vdj_t_receptor,
        multi_config_sha   = self.multi_config_sha,
        vdj_reference_path = self.vdj_reference_path,
        has_no_vdj_ref     = self.config.has_no_vdj_ref,
        vdj_gw             = self.vdj_t_gw,
        vdj_clonotype      = self.vdj_t_clonotype,
    ) using (
        disabled = self.config.disable_vdj_t,
    )

    call VDJ_REPORTER as VDJ_B_REPORTER(
        sample_id          = self.sample_id,
        sample_desc        = self.sample_desc,
        receptor           = self.vdj_b_receptor,
        multi_config_sha   = self.multi_config_sha,
        vdj_reference_path = self.vdj_reference_path,
        has_no_vdj_ref     = self.config.has_no_vdj_ref,
        vdj_gw             = self.vdj_b_gw,
        vdj_clonotype      = self.vdj_b_clonotype,
    ) using (
        disabled = self.config.disable_vdj_b,
    )

    call GENERATE_LIBRARY_PLOTS(
        # The stage needs to be disabled for vdj only multi
        disable_count                = self.config.disable_count,
        # pared-down data needed for barcode rank plot
        barcode_summary_h5           = self.barcode_summary,
        filtered_barcodes            = self.filtered_barcodes,
        reference_path               = self.reference_path,
        # for jibes biplot/cmo tsne
        tag_assigner_pickle          = self.assign_tags_outs.tag_assigner_pickle,
        cells_per_tag                = self.assign_tags_outs.cells_per_tag,
        non_tag_assignments          = self.assign_tags_outs.non_tag_assignments,
        analysis                     = self.count_analyzer.analysis,
        # hard to decouple targeted table/plot from whole legacy websummary code
        # TODO need to remove these dependencies later
        metrics_summary              = SUMMARIZE_REPORTS.metrics_summary_json,
        sample_id                    = self.sample_id,
        sample_desc                  = self.sample_desc,
        filtered_gene_bc_matrices_h5 = self.count_gw.basic_counter_outs.filtered_gene_bc_matrices_h5,
        barcode_whitelist            = self.count_gw.setup_chunks_outs.barcode_whitelist,
        gem_groups                   = self.count_gw.basic_counter_outs.gem_groups,
        feature_reference            = self.feature_reference,
        target_set_name              = self.count_gw.target_outs.target_set_name,
        per_feature_metrics_csv      = self.targeted_analyzer.per_feature_metrics_csv,
        include_introns              = self.include_introns,
    ) using (
        disabled = self.config.disable_multi_count,
    )

    return (
        count_summary          = SUMMARIZE_REPORTS,
        cloupe                 = CHOOSE_CLOUPE.cloupe,
        vdj_t_report           = VDJ_T_REPORTER.report,
        vdj_b_report           = VDJ_B_REPORTER.report,
        barcode_rank_plots     = GENERATE_LIBRARY_PLOTS.library_to_barcode_rank,
        jibes_biplot_histogram = GENERATE_LIBRARY_PLOTS.jibes_biplot_histogram,
        targeted_plot          = GENERATE_LIBRARY_PLOTS.targeted_plot,
        cmo_tsne_plot          = GENERATE_LIBRARY_PLOTS.cmo_tsne_plot,
    )
}

stage GENERATE_LIBRARY_PLOTS(
    in  bool      disable_count,
    # for barcode rank
    in  h5        barcode_summary_h5,
    in  csv       filtered_barcodes,
    in  path      reference_path,
    # for jibes biplot
    in  pickle    tag_assigner_pickle,
    # for cmo TSNE plot
    in  path      analysis,
    in  json      cells_per_tag,
    in  json      non_tag_assignments,
    # needed for legacy WS, code for targeted plot/table tricky to decouple from it
    # TODO(Peter Edge) excise that functionality and remove these dependencies
    in  json      metrics_summary,
    in  string    sample_id,
    in  string    sample_desc,
    in  h5        filtered_gene_bc_matrices_h5,
    in  string    barcode_whitelist,
    in  int[]     gem_groups,
    in  csv       feature_reference,
    in  string    target_set_name,
    in  csv       per_feature_metrics_csv,
    in  bool      include_introns,
    out map<json> library_to_barcode_rank,
    out json      jibes_biplot_histogram,
    out json      targeted_plot,
    out json      cmo_tsne_plot,
    src py        "../rna/stages/multi/generate_library_plots",
) using (
    mem_gb   = 5,
    volatile = strict,
)

stage GENERATE_SAMPLE_TSNE_PLOTS(
    in  path analysis,
    in  bool is_antibody_only,
    out json sample_tsne_plots,
    src py   "../rna/stages/multi/generate_sample_tsne_plots",
) using (
    mem_gb   = 8,
    volatile = strict,
)

pipeline SAMPLE_REPORTER(
    in  SampleSlfeOuts     sample_outs,
    in  string             sample_id,
    in  string             sample_desc,
    in  FullPipelineConfig config,
    in  string             count_pipestance_type,
    in  SC_RNA_ANALYZER    count_analyzer,
    in  _CRISPR_ANALYZER   crispr_analyzer,
    in  _TARGETED_ANALYZER targeted_analyzer,
    in  path               reference_path,
    in  h5                 barcode_whitelist,
    in  h5                 barcode_summary,
    in  CellCalling        cell_calling_config,
    in  json               sample_assignment_metrics,
    in  json               target_features_metrics,
    in  json               cells_per_tag,
    in  bool               is_antibody_only,
    out json               metrics_summary,
    out cloupe             cloupe,
    out json               sample_tsne_plots,
    out VdjReport          vdj_t_report,
    out VdjReport          vdj_b_report,
)
{
    call _SAMPLE_CELLS_REPORTER(
        molecule_info             = self.sample_outs.molecule_info,
        barcode_whitelist         = self.barcode_whitelist,
        reference_path            = self.reference_path,
        recovered_cells           = self.cell_calling_config.recovered_cells,
        matrices_h5               = self.sample_outs.all_genes_matrix_h5,
        matrix_computer_summary   = self.sample_outs.metrics_summary,
        filtered_barcodes         = self.sample_outs.filtered_barcodes,
        per_barcode_metrics       = self.sample_outs.per_barcode_metrics,
        barcode_summary           = self.barcode_summary,
        sample_assignment_metrics = self.sample_assignment_metrics,
        count_analyzer_metrics    = self.count_analyzer.summary,
        targeted_analyzer_metrics = self.targeted_analyzer.targeted_analysis_metrics,
        crispr_analyzer_metrics   = self.crispr_analyzer.crispr_analysis_metrics,
        target_features_metrics   = self.target_features_metrics,
    )

    call GENERATE_SAMPLE_TSNE_PLOTS(
        analysis         = self.count_analyzer.analysis,
        is_antibody_only = self.is_antibody_only,
    )

    call CLOUPE_PREPROCESS(
        pipestance_type              = self.count_pipestance_type,
        sample_id                    = self.sample_id,
        sample_desc                  = self.sample_desc,
        analysis                     = self.count_analyzer.analysis,
        filtered_gene_bc_matrices_h5 = self.sample_outs.matrix_h5,
        metrics_json                 = _SAMPLE_CELLS_REPORTER.summary,
        aggregation_csv              = null,
        gem_group_index_json         = null,
        tissue_image_paths           = null,
        dark_images                  = null,
        tissue_positions_list        = null,
        fiducial_positions_list      = null,
        dzi_info                     = null,
        dzi_tiles_paths              = null,
        scale_factors_json           = null,
        no_secondary_analysis        = false,
        barcode_whitelist            = null,
        loupe_map                    = null,
        product_type                 = "sc",
        cells_per_tag                = self.cells_per_tag,
        cells_per_protospacer        = self.crispr_analyzer.cells_per_protospacer,
    ) using (
        disabled = self.config.disable_count,
    )

    # TODO(Peter / Sreenath): wire up per-sample VDJ reporting here

    return (
        metrics_summary   = _SAMPLE_CELLS_REPORTER.summary,
        cloupe            = CLOUPE_PREPROCESS.output_for_cloupe,
        sample_tsne_plots = GENERATE_SAMPLE_TSNE_PLOTS.sample_tsne_plots,
        vdj_t_report      = null,
        vdj_b_report      = null,
    )
}

stage MAKE_MULTI_GEM_RNA_AGGR_SAMPLE_DEFS(
    in  int[] gem_groups,
    in  h5[]  molecule_info,
    out map[] sample_defs,
    out csv   aggr_csv,
    src py    "../rna/stages/multi/make_multi_gem_rna_aggr_sample_defs",
) using (
    volatile = strict,
)

# this is a temp hack to work around the fact that SC_RNA_AGGREGATOR doesn't depend
# on the molecule infos, but rather a CSV specifying them.
pipeline DEPEND_ON_MOLECULE_INFO_H5S(
    in  h5   _aggred_matrix,
    in  h5[] _dependent_h5s,
    out h5   _aggred_matrix,
    out h5[] _dependent_h5s,
)
{
    return (
        _aggred_matrix = self._aggred_matrix,
        _dependent_h5s = self._dependent_h5s,
    )
}

# outputs of this pipeline are designed to match the outputs of _basic_sc_rna_counter as closely as possible,
# excluding the outputs that are only necessary for merging.
pipeline MERGE_GEM_WELLS_AND_SLICE_CELLS(
    in  int[]                      gem_groups,
    in  CountInputs                count_input,
    in  COUNT_GEM_WELL_PROCESSOR[] gem_well_processor_count,
    out csv                        filtered_barcodes,
    out csv                        barcode_correction_csv,
    out bam                        possorted_genome_bam,
    out SampleBamFile[]            multi_pos_sorted_bam,
    out bam.bai                    possorted_genome_bam_index,
    out json                       summary,
    out h5                         molecule_info,
    out h5                         raw_gene_bc_matrices_h5,
    out path                       raw_gene_bc_matrices_mex,
    out h5                         filtered_gene_bc_matrices_h5,
    out path                       filtered_gene_bc_matrices_mex,
    out int[]                      gem_groups,
    out ann.bincode.lz4[]          annotation_files,
)
{
    # gem_well_processor outputs has all the input fields of WRITE_POS_BAM, with the proper names
    # this stage takes those and merges them
    call MERGE_GEM_WELL_FILES(
        unmerged_gem_well_files = self.gem_well_processor_count.basic_counter_outs,
    )

    call COLLATE_METRICS(
        per_barcode_metrics  = MERGE_GEM_WELL_FILES.merged_gem_well_files.per_barcode_metrics_shard,
        reference_path       = self.count_input.reference_path,
        feature_reference    = MERGE_GEM_WELL_FILES.merged_gem_well_files.slfe_feature_reference,
        sample_barcodes_json = null,
    )

    # create monolithic multi-GEM BAM file
    call WRITE_POS_BAM(
        alignments           = MERGE_GEM_WELL_FILES.merged_gem_well_files.alignments,
        read_chunks          = MERGE_GEM_WELL_FILES.merged_gem_well_files.read_chunks,
        bam_header           = MERGE_GEM_WELL_FILES.merged_gem_well_files.bam_header,
        target_set_name      = MERGE_GEM_WELL_FILES.merged_gem_well_files.target_set_name,
        sample_barcodes_json = self.count_input.force_sample_barcodes,
        no_bam               = false,
    )

    # create multi-GEM filtered barcodes CSV and barcode correction CSV
    call MERGE_GEM_WELL_CSVS(
        filtered_barcodes      = self.gem_well_processor_count.basic_counter_outs.filtered_barcodes,
        barcode_correction_csv = self.gem_well_processor_count.basic_counter_outs.barcode_correction_csv,
    )

    call MAKE_MULTI_GEM_RNA_AGGR_SAMPLE_DEFS(
        gem_groups    = self.gem_groups,
        molecule_info = self.gem_well_processor_count.basic_counter_outs.molecule_info,
    )

    call CHECK_MOLECULE_INFO_VERSION(
        sample_defs  = MAKE_MULTI_GEM_RNA_AGGR_SAMPLE_DEFS.sample_defs,
        product_type = "sc",
    )

    call SC_RNA_AGGREGATOR(
        # self.count_input.sample_id,
        sample_id             = "sample",
        # self.count_input.sample_id,
        sample_desc           = "sample",
        sample_defs           = CHECK_MOLECULE_INFO_VERSION.updated_sample_defs,
        normalization_mode    = "mapped",
        no_secondary_analysis = self.count_input.no_secondary_analysis,
        num_analysis_bcs      = null,
        num_pca_bcs           = null,
        num_pca_genes         = null,
        num_principal_comps   = null,
        cbc_knn               = null,
        cbc_alpha             = null,
        cbc_sigma             = null,
        cbc_realign_panorama  = null,
        max_clusters          = null,
        graphclust_neighbors  = null,
        neighbor_a            = null,
        neighbor_b            = null,
        tsne_perplexity       = null,
        tsne_input_pcs        = null,
        tsne_theta            = null,
        random_seed           = null,
        tsne_max_dims         = null,
        tsne_max_iter         = null,
        tsne_stop_lying_iter  = null,
        tsne_mom_switch_iter  = null,
        product_type          = "sc",
    )

    call CHECK_INVARIANTS(
        input_sample_defs = CHECK_MOLECULE_INFO_VERSION.updated_sample_defs,
        merged_raw_gene_bc_matrices_h5 = SC_RNA_AGGREGATOR.raw_gene_bc_matrices_h5,
    )

    call DEPEND_ON_MOLECULE_INFO_H5S(
        _aggred_matrix = SC_RNA_AGGREGATOR.raw_gene_bc_matrices_h5,
        _dependent_h5s = self.gem_well_processor_count.basic_counter_outs.molecule_info,
    )

    call CLOUPE_PREPROCESS(
        pipestance_type              = "SC_RNA_AGGREGATOR_CS",
        # self.count_input.sample_id,
        sample_id                    = "sample",
        # self.count_input.sample_id,
        sample_desc                  = "sample",
        analysis                     = SC_RNA_AGGREGATOR.analysis,
        filtered_gene_bc_matrices_h5 = SC_RNA_AGGREGATOR.filtered_gene_bc_matrices_h5,
        metrics_json                 = SC_RNA_AGGREGATOR.summary,
        aggregation_csv              = MAKE_MULTI_GEM_RNA_AGGR_SAMPLE_DEFS.aggr_csv,
        gem_group_index_json         = SC_RNA_AGGREGATOR.gem_group_index_json,
        tissue_image_paths           = null,
        dark_images                  = null,
        tissue_positions_list        = null,
        fiducial_positions_list      = null,
        dzi_info                     = null,
        dzi_tiles_paths              = null,
        scale_factors_json           = null,
        no_secondary_analysis        = self.count_input.no_secondary_analysis,
        barcode_whitelist            = null,
        loupe_map                    = null,
        product_type                 = "sc",
        cells_per_tag                = null,
        cells_per_protospacer        = null,
    )

    # certain metrics, such as those from make_shard and barcode correction,
    # are not easily merged over GEM wells and are therefore excluded.
    # those metrics could be added here and merged into the output metrics
    # call MERGE_METRICS(
    #     summaries = [
    #         MAKE_SHARD.summary,                             # TODO
    #         BARCODE_CORRECTION.summary,                     # TODO
    #         _SLFE_PARTIAL_FIRST_PASS.umi_filtering_summary, # TODO
    #         COLLATE_METRICS.summary,
    #     ],
    # )

    return (
        filtered_barcodes             = MERGE_GEM_WELL_CSVS.filtered_barcodes,
        barcode_correction_csv        = MERGE_GEM_WELL_CSVS.barcode_correction_csv,
        possorted_genome_bam          = WRITE_POS_BAM.pos_sorted_bam.bam_file,
        possorted_genome_bam_index    = WRITE_POS_BAM.pos_sorted_bam.bam_index_file,
        # SUMMARIZE_BASIC_REPORTS.summary,  # no report
        summary                       = COLLATE_METRICS.summary,
        molecule_info                 = SC_RNA_AGGREGATOR.molecule_info,
        raw_gene_bc_matrices_h5       = SC_RNA_AGGREGATOR.raw_gene_bc_matrices_h5,
        raw_gene_bc_matrices_mex      = SC_RNA_AGGREGATOR.raw_gene_bc_matrices_mex,
        filtered_gene_bc_matrices_h5  = SC_RNA_AGGREGATOR.filtered_gene_bc_matrices_h5,
        filtered_gene_bc_matrices_mex = SC_RNA_AGGREGATOR.filtered_gene_bc_matrices_mex,
        gem_groups                    = self.gem_groups,
        annotation_files              = MERGE_GEM_WELL_FILES.merged_gem_well_files.annotation_files,
        # sliced outputs
        multi_pos_sorted_bam          = WRITE_POS_BAM.multi_pos_sorted_bam,
    )
}

# should we add the rest of the return values of _basic_sc_rna_counter here,
# for consistency, even if they are not needed?
# Or create some sort of struct that represents the shared outputs of _basic_sc_rna_counter and this stage?

stage STRUCTIFY_PER_SAMPLE_OUTS(
    in  SampleBamFile[]      sample_bams,
    in  SampleMetrics[]      sample_metrics,
    in  SampleMoleculeInfo[] sample_molecule_infos,
    in  SampleMatrices[]     sample_matrices,
    in  json                 multi_graph,
    in  csv                  feature_reference,
    in  csv                  target_panel,
    out map<SampleSlfeOuts>  sample_outs,
    out bam                  unassigned_alignments,
    out bam.bai              unassigned_alignments_index,
    src py                   "../rna/stages/multi/structify_per_sample_outs",
) using (
    volatile = false,
)

stage SANITIZE_MAP_CALLS(
    in  map<path>   in_crispr_analysis,
    in  map<path>   in_rna_analysis,
    in  map<cloupe> in_cloupe_file,
    in  map<json>   in_metrics_summary,
    in  map<json>   in_sample_tsne_plots,
    in  map<json>   in_barcode_rank_plots,
    out map<path>   crispr_analysis,
    out map<path>   rna_analysis,
    out map<cloupe> cloupe_file,
    out map<json>   metrics_summary,
    out map         metrics_summary_untyped,
    out map         sample_tsne_plots,
    out map         barcode_rank_plots,
    src py          "../rna/stages/multi/sanitize_map_calls",
) using (
    volatile = strict,
)

#
# @include "sc_multi_core.mro"
#

pipeline MULTI_WEBSUMMARY_BUILDER(
    in  VdjGenInputs                 vdj_gen_inputs,
    in  json                         vdj_t_metrics_summary,
    in  json                         vdj_t_ws_json,
    in  string                       vdj_t_receptor,
    in  VdjInputs                    vdj_t_inputs,
    in  smf.json                     vdj_t_sequencing_metrics,
    in  bool                         disable_vdj_t,
    in  json                         vdj_b_metrics_summary,
    in  json                         vdj_b_ws_json,
    in  string                       vdj_b_receptor,
    in  VdjInputs                    vdj_b_inputs,
    in  bool                         disable_vdj_b,
    in  smf.json                     vdj_b_sequencing_metrics,
    in  map                          per_sample_metrics,
    in  json                         library_metrics,
    in  smf.json                     sequencing_metrics,
    in  csv                          multi_config,
    in  json                         multi_graph,
    in  CommonInputs                 common_inputs,
    in  CountInputs                  count_inputs,
    in  json                         tag_contaminant_info,
    in  map                          sample_tsne_plots,
    in  map                          barcode_rank_plots,
    in  json                         jibes_biplot_histogram,
    in  json                         targeted_plot,
    in  json                         cmo_tsne_plot,
    in  string                       target_set_name,
    out WRITE_MULTI_WEB_SUMMARY_JSON multi_web_summary_json,
    out map<html>                    multi_web_summaries,
    out map<csv>                     metrics_summary_csvs,
)
{
    call BUILD_VDJ_WS_CONTENTS as BUILD_VDJ_T_WS_CONTENTS(
        vdj_gen_inputs     = self.vdj_gen_inputs,
        metrics_summary    = self.vdj_t_metrics_summary,
        receptor           = self.vdj_t_receptor,
        vdj_inputs         = self.vdj_t_inputs,
        sequencing_metrics = self.vdj_t_sequencing_metrics,
        vdj_ws_json        = self.vdj_t_ws_json,
    ) using (
        disabled = self.disable_vdj_t,
    )

    call BUILD_VDJ_WS_CONTENTS as BUILD_VDJ_B_WS_CONTENTS(
        vdj_gen_inputs     = self.vdj_gen_inputs,
        metrics_summary    = self.vdj_b_metrics_summary,
        receptor           = self.vdj_b_receptor,
        vdj_inputs         = self.vdj_b_inputs,
        sequencing_metrics = self.vdj_b_sequencing_metrics,
        vdj_ws_json        = self.vdj_b_ws_json,
    ) using (
        disabled = self.disable_vdj_b,
    )

    call BUILD_MULTI_GRAPH_VIEW(
        multi_graph = self.multi_graph,
    )

    call WRITE_MULTI_WEB_SUMMARY_JSON(
        per_sample_metrics     = self.per_sample_metrics,
        library_metrics        = self.library_metrics,
        multi_config           = self.multi_config,
        multi_graph            = self.multi_graph,
        multi_graph_svg        = BUILD_MULTI_GRAPH_VIEW.view,
        common_inputs          = self.common_inputs,
        count_inputs           = self.count_inputs,
        sequencing_metrics     = self.sequencing_metrics,
        tag_contaminant_info   = self.tag_contaminant_info,
        sample_tsne_plots      = self.sample_tsne_plots,
        barcode_rank_plots     = self.barcode_rank_plots,
        jibes_biplot_histogram = self.jibes_biplot_histogram,
        targeted_plot          = self.targeted_plot,
        cmo_tsne_plot          = self.cmo_tsne_plot,
        target_set_name        = self.target_set_name,
        vdj_t_contents         = BUILD_VDJ_T_WS_CONTENTS.vdj_ws_contents,
        vdj_b_contents         = BUILD_VDJ_B_WS_CONTENTS.vdj_ws_contents,
    )

    call BUILD_MULTI_WEB_SUMMARY(
        web_summary_data     = WRITE_MULTI_WEB_SUMMARY_JSON.web_summary_json,
        metrics_summary_csvs = WRITE_MULTI_WEB_SUMMARY_JSON.metrics_summary_csv,
    )

    return (
        multi_web_summary_json = WRITE_MULTI_WEB_SUMMARY_JSON,
        multi_web_summaries    = BUILD_MULTI_WEB_SUMMARY.web_summaries,
        metrics_summary_csvs   = BUILD_MULTI_WEB_SUMMARY.metrics_summary_csvs,
    )
}

pipeline SC_MULTI_CORE(
    in  CommonInputs                 common_input,
    in  CountInputs                  count_input,
    in  VdjInputs[]                  vdj_inputs,
    in  VdjGenInputs                 vdj_gen_inputs,
    in  BasicPipelineConfig          basic_config,
    in  csv                          multi_config,
    in  json                         multi_graph,
    in  bool                         is_pd,
    # We would ultimately want to consolidate or get rid of the
    # remaining inputs.
    in  string[]                     count_allowed_chems,
    in  string                       count_pipestance_type,
    out FullPipelineConfig           full_config,
    out SPLIT_VDJ_INPUTS             split_vdj,
    out MULTI_GEM_WELL_PROCESSOR     multi_gw,
    out SC_VDJ_CLONOTYPE_ASSIGNER    vdj_t_clonotype,
    out SC_VDJ_CLONOTYPE_ASSIGNER    vdj_b_clonotype,
    out MULTI_REPORTER               multi_reporter,
    out COUNT_ANALYZER               count_analyzer,
    out DETECT_CHEMISTRY             detect_count_chem,
    out DISABLE_FEATURE_STAGES       disable_feat,
    out VdjRefFolder                 vdj_ref_out,
    out map<COUNT_ANALYZER>          sample_analyzer,
    out map<SAMPLE_REPORTER>         sample_reporter,
    out map<SampleSlfeOuts>          sample_outs,
    out bam                          unassigned_bam,
    out bam.bai                      unassigned_bai,
    out WRITE_MULTI_WEB_SUMMARY_JSON multi_web_summary_json,
    out map<html>                    multi_web_summaries,
    out map<csv>                     multi_metrics_csvs,
)
{
    # map-calls a helper pipeline over GEM wells to perform DETECT_CHEMISTRY on
    # the GEX and VDJ libraries for each GEM well
    # checks for compatibility between GEM wells and returns a combined result
    call MULTI_CHEMISTRY_DETECTOR(
        multi_gem_well_inputs = [
            {
                common_inputs: self.common_input,
                count_inputs:  self.count_input,
                vdj_inputs:    self.vdj_inputs,
            },
        ],
        vdj_gen_inputs        = self.vdj_gen_inputs,
        basic_config          = self.basic_config,
        count_allowed_chems   = self.count_allowed_chems,
    )

    call SPLIT_VDJ_INPUTS(
        vdj_inputs          = self.vdj_inputs,
        # for the time being, use legacy outputs
        vdj_chemistry_types = MULTI_CHEMISTRY_DETECTOR.legacy.detect_vdj_chem.chemistry_type,
        # for the time being, use legacy outputs
        vdj_receptors       = MULTI_CHEMISTRY_DETECTOR.legacy.detect_vdj_chem.receptor,
    ) using (
        disabled = self.basic_config.disable_vdj,
    )

    call MAKE_FULL_CONFIG(
        vdj_t_input        = SPLIT_VDJ_INPUTS.vdj_t_input,
        vdj_b_input        = SPLIT_VDJ_INPUTS.vdj_b_input,
        basic_config       = self.basic_config,
        vdj_reference_path = self.vdj_gen_inputs.vdj_reference_path,
    )

    call MULTI_GEM_WELL_PROCESSOR(
        gem_group              = 1,
        multi_config_sha       = self.common_input.multi_config_sha,
        sample_id              = self.common_input.sample_id,
        count_inputs           = self.count_input,
        # for the time being, use legacy outputs
        count_chem             = MULTI_CHEMISTRY_DETECTOR.legacy.detect_count_chem,
        libraries_to_translate = MULTI_CHEMISTRY_DETECTOR.libraries_to_translate,
        vdj_t_chem_type        = SPLIT_VDJ_INPUTS.vdj_t_chemistry_type,
        vdj_t_receptor         = SPLIT_VDJ_INPUTS.vdj_t_receptor,
        vdj_t_inputs           = SPLIT_VDJ_INPUTS.vdj_t_input,
        vdj_b_chem_type        = SPLIT_VDJ_INPUTS.vdj_b_chemistry_type,
        vdj_b_receptor         = SPLIT_VDJ_INPUTS.vdj_b_receptor,
        vdj_b_inputs           = SPLIT_VDJ_INPUTS.vdj_b_input,
        vdj_gen_inputs         = self.vdj_gen_inputs,
        is_pd                  = self.is_pd,
        config                 = MAKE_FULL_CONFIG.config,
        multi_graph            = self.multi_graph,
    )

    # eventually GEM_WELL_PROCESSOR will be map-called and this stage will merge those results
    #call MERGE_GEM_WELLS_AND_SLICE_CELLS(
    #    gem_groups               = [1],
    #    count_input              = self.count_input,
    #    gem_well_processor_count = [MULTI_GEM_WELL_PROCESSOR.count],
    #) using (
    #    disabled = MAKE_FULL_CONFIG.config.disable_multi,
    #)

    # NOTE: This needs to be map-called for each sample when multiplexed
    call SC_VDJ_CLONOTYPE_ASSIGNER as VDJ_T_CLONOTYPE_ASSIGNER(
        vdj_reference_path = self.vdj_gen_inputs.vdj_reference_path,
        contig_annotations = MULTI_GEM_WELL_PROCESSOR.vdj_t.assembler_outs.contig_annotations,
        receptor           = SPLIT_VDJ_INPUTS.vdj_t_receptor,
        has_no_vdj_ref     = MAKE_FULL_CONFIG.config.has_no_vdj_ref,
    ) using (
        disabled = MAKE_FULL_CONFIG.config.disable_vdj_t,
    )

    # NOTE: This needs to be map-called for each sample when multiplexed
    call SC_VDJ_CLONOTYPE_ASSIGNER as VDJ_B_CLONOTYPE_ASSIGNER(
        vdj_reference_path = self.vdj_gen_inputs.vdj_reference_path,
        contig_annotations = MULTI_GEM_WELL_PROCESSOR.vdj_b.assembler_outs.contig_annotations,
        receptor           = SPLIT_VDJ_INPUTS.vdj_b_receptor,
        has_no_vdj_ref     = MAKE_FULL_CONFIG.config.has_no_vdj_ref,
    ) using (
        disabled = MAKE_FULL_CONFIG.config.disable_vdj_b,
    )

    call STRUCTIFY_PER_SAMPLE_OUTS(
        sample_bams           = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.multi_pos_sorted_bam,
        sample_metrics        = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.multi_metrics,
        sample_molecule_infos = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.multi_molecule_info,
        sample_matrices       = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.multi_matrices,
        multi_graph           = self.multi_graph,
        feature_reference     = self.count_input.feature_reference,
        target_panel          = MULTI_GEM_WELL_PROCESSOR.count.target_outs.target_panel,
    )

    # TODO: Can `disable_crispr` be part of the `BasicPipelineConfig`?
    call DISABLE_FEATURE_STAGES(
        sample_def          = self.count_input.sample_def,
        disable_multi       = MAKE_FULL_CONFIG.config.disable_multi,
        disable_count       = MAKE_FULL_CONFIG.config.disable_count,
        in_disable_targeted = MULTI_GEM_WELL_PROCESSOR.count.target_outs.disable_targeted,
        is_pd               = self.is_pd,
        sample_outs         = STRUCTIFY_PER_SAMPLE_OUTS.sample_outs,
        multi_graph         = self.multi_graph,
    )

    # per-sample map call of the count analyzer for multi runs
    map call COUNT_ANALYZER as SAMPLE_ANALYZER(
        filtered_matrices_h5  = split STRUCTIFY_PER_SAMPLE_OUTS.sample_outs.matrix_h5,
        molecule_info         = split STRUCTIFY_PER_SAMPLE_OUTS.sample_outs.molecule_info,
        count_inputs          = self.count_input,
        filtered_feature_counts_matrix = split STRUCTIFY_PER_SAMPLE_OUTS.sample_outs.matrix_h5,
        filtered_barcodes     = split STRUCTIFY_PER_SAMPLE_OUTS.sample_outs.filtered_barcodes,
        counter_metrics_json  = split STRUCTIFY_PER_SAMPLE_OUTS.sample_outs.metrics_summary,
        disable_rna           = false,
        disable_crispr        = DISABLE_FEATURE_STAGES.disable_crispr,
        disable_targeted      = DISABLE_FEATURE_STAGES.disable_targeted,
        feature_reference     = self.count_input.feature_reference,
        no_secondary_analysis = self.count_input.no_secondary_analysis,
        parse_target_features = MULTI_GEM_WELL_PROCESSOR.count.target_outs,
    ) using (
        disabled = MAKE_FULL_CONFIG.config.disable_multi_count,
    )

    # library-level count analyzer
    # library-level rna analyzer is not run for legacy count runs
    call COUNT_ANALYZER(
        filtered_matrices_h5  = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.filtered_gene_bc_matrices_h5,
        molecule_info         = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.molecule_info,
        count_inputs          = self.count_input,
        filtered_feature_counts_matrix = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.filtered_gene_bc_matrices_h5,
        filtered_barcodes     = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.filtered_barcodes,
        counter_metrics_json  = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.summary,
        disable_rna           = DISABLE_FEATURE_STAGES.disable_library_cloupe,
        disable_crispr        = DISABLE_FEATURE_STAGES.disable_crispr,
        disable_targeted      = DISABLE_FEATURE_STAGES.disable_targeted,
        feature_reference     = self.count_input.feature_reference,
        no_secondary_analysis = self.count_input.no_secondary_analysis,
        parse_target_features = MULTI_GEM_WELL_PROCESSOR.count.target_outs,
    ) using (
        disabled = MAKE_FULL_CONFIG.config.disable_count,
    )

    # per-sample map-called run of reporter, for multi runs
    map call SAMPLE_REPORTER(
        sample_outs               = split STRUCTIFY_PER_SAMPLE_OUTS.sample_outs,
        count_analyzer            = split SAMPLE_ANALYZER.rna_analyzer,
        crispr_analyzer           = split SAMPLE_ANALYZER.crispr_analyzer,
        targeted_analyzer         = split SAMPLE_ANALYZER.targeted_analyzer,
        sample_assignment_metrics = split MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.sample_assignment_metrics,
        target_features_metrics   = MULTI_GEM_WELL_PROCESSOR.count.target_outs.summary,
        sample_id                 = self.common_input.sample_id,
        sample_desc               = self.common_input.sample_desc,
        config                    = MAKE_FULL_CONFIG.config,
        count_pipestance_type     = self.count_pipestance_type,
        barcode_whitelist         = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.barcode_whitelist,
        cell_calling_config       = self.count_input.cell_calling_config,
        barcode_summary           = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.barcode_summary,
        reference_path            = self.count_input.reference_path,
        cells_per_tag             = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.assign_tags.cells_per_tag,
        is_antibody_only          = MULTI_CHEMISTRY_DETECTOR.is_antibody_only,
    ) using (
        disabled = MAKE_FULL_CONFIG.config.disable_multi_count,
    )

    # reporter for library-level information
    call MULTI_REPORTER(
        sample_id              = self.common_input.sample_id,
        sample_desc            = self.common_input.sample_desc,
        multi_config_sha       = self.common_input.multi_config_sha,
        config                 = MAKE_FULL_CONFIG.config,
        count_pipestance_type  = self.count_pipestance_type,
        feature_reference      = self.count_input.feature_reference,
        reference_path         = self.count_input.reference_path,
        count_gw               = MULTI_GEM_WELL_PROCESSOR.count,
        include_introns        = self.count_input.include_introns,
        count_analyzer         = COUNT_ANALYZER.rna_analyzer,
        crispr_analyzer        = COUNT_ANALYZER.crispr_analyzer,
        targeted_analyzer      = COUNT_ANALYZER.targeted_analyzer,
        vdj_reference_path     = self.vdj_gen_inputs.vdj_reference_path,
        vdj_t_receptor         = SPLIT_VDJ_INPUTS.vdj_t_receptor,
        vdj_t_gw               = MULTI_GEM_WELL_PROCESSOR.vdj_t,
        vdj_t_clonotype        = VDJ_T_CLONOTYPE_ASSIGNER,
        vdj_b_receptor         = SPLIT_VDJ_INPUTS.vdj_b_receptor,
        vdj_b_gw               = MULTI_GEM_WELL_PROCESSOR.vdj_b,
        vdj_b_clonotype        = VDJ_B_CLONOTYPE_ASSIGNER,
        filtered_barcodes      = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.filtered_barcodes,
        barcode_summary        = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.barcode_summary,
        assign_tags_outs       = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.assign_tags,
        disable_library_cloupe = DISABLE_FEATURE_STAGES.disable_library_cloupe,
        sample_cloupe          = SAMPLE_REPORTER.cloupe,
    )

    call COPY_VDJ_REFERENCE(
        vdj_reference_path = self.vdj_gen_inputs.vdj_reference_path,
        vdj_t_donor_ref_fa = VDJ_T_CLONOTYPE_ASSIGNER.donor_ref_fa,
        vdj_b_donor_ref_fa = VDJ_B_CLONOTYPE_ASSIGNER.donor_ref_fa,
    ) using (
        disabled = self.basic_config.disable_vdj,
    )

    call SANITIZE_MAP_CALLS(
        in_crispr_analysis    = null,
        in_rna_analysis       = null,
        in_cloupe_file        = null,
        in_metrics_summary    = SAMPLE_REPORTER.metrics_summary,
        in_sample_tsne_plots  = SAMPLE_REPORTER.sample_tsne_plots,
        in_barcode_rank_plots = MULTI_REPORTER.barcode_rank_plots,
    ) using (
        disabled = MAKE_FULL_CONFIG.config.disable_multi,
    )

    call MULTI_WEBSUMMARY_BUILDER(
        vdj_gen_inputs           = self.vdj_gen_inputs,
        vdj_t_metrics_summary    = MULTI_REPORTER.vdj_t_report.metrics_summary_json,
        vdj_t_ws_json            = MULTI_REPORTER.vdj_t_report.web_summary_data,
        vdj_t_receptor           = SPLIT_VDJ_INPUTS.vdj_t_receptor,
        vdj_t_inputs             = SPLIT_VDJ_INPUTS.vdj_t_input,
        vdj_t_sequencing_metrics = MULTI_GEM_WELL_PROCESSOR.vdj_t.assembler_outs.sequencing_metrics,
        disable_vdj_t            = MAKE_FULL_CONFIG.config.disable_vdj_t,
        vdj_b_metrics_summary    = MULTI_REPORTER.vdj_b_report.metrics_summary_json,
        vdj_b_ws_json            = MULTI_REPORTER.vdj_b_report.web_summary_data,
        vdj_b_receptor           = SPLIT_VDJ_INPUTS.vdj_b_receptor,
        vdj_b_inputs             = SPLIT_VDJ_INPUTS.vdj_b_input,
        vdj_b_sequencing_metrics = MULTI_GEM_WELL_PROCESSOR.vdj_b.assembler_outs.sequencing_metrics,
        disable_vdj_b            = MAKE_FULL_CONFIG.config.disable_vdj_b,
        per_sample_metrics       = SANITIZE_MAP_CALLS.metrics_summary_untyped,
        library_metrics          = MULTI_REPORTER.count_summary.metrics_summary_json,
        multi_config             = self.multi_config,
        multi_graph              = self.multi_graph,
        common_inputs            = self.common_input,
        count_inputs             = self.count_input,
        sequencing_metrics       = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.sequencing_metrics,
        tag_contaminant_info     = MULTI_GEM_WELL_PROCESSOR.count.basic_counter_outs.assign_tags.tag_contaminant_info,
        sample_tsne_plots        = SANITIZE_MAP_CALLS.sample_tsne_plots,
        barcode_rank_plots       = SANITIZE_MAP_CALLS.barcode_rank_plots,
        jibes_biplot_histogram   = MULTI_REPORTER.jibes_biplot_histogram,
        targeted_plot            = MULTI_REPORTER.targeted_plot,
        cmo_tsne_plot            = MULTI_REPORTER.cmo_tsne_plot,
        target_set_name          = MULTI_GEM_WELL_PROCESSOR.count.target_outs.target_set_name,
    ) using (
        disabled = MAKE_FULL_CONFIG.config.disable_multi,
    )

    return (
        full_config            = MAKE_FULL_CONFIG.config,
        split_vdj              = SPLIT_VDJ_INPUTS,
        multi_gw               = MULTI_GEM_WELL_PROCESSOR,
        vdj_t_clonotype        = VDJ_T_CLONOTYPE_ASSIGNER,
        vdj_b_clonotype        = VDJ_B_CLONOTYPE_ASSIGNER,
        multi_reporter         = MULTI_REPORTER,
        count_analyzer         = COUNT_ANALYZER,
        detect_count_chem      = MULTI_CHEMISTRY_DETECTOR.legacy.detect_count_chem,
        disable_feat           = DISABLE_FEATURE_STAGES,
        vdj_ref_out            = COPY_VDJ_REFERENCE.vdj_reference,
        sample_analyzer        = SAMPLE_ANALYZER,
        sample_reporter        = SAMPLE_REPORTER,
        sample_outs            = STRUCTIFY_PER_SAMPLE_OUTS.sample_outs,
        unassigned_bam         = STRUCTIFY_PER_SAMPLE_OUTS.unassigned_alignments,
        unassigned_bai         = STRUCTIFY_PER_SAMPLE_OUTS.unassigned_alignments_index,
        multi_web_summary_json = MULTI_WEBSUMMARY_BUILDER.multi_web_summary_json,
        multi_web_summaries    = MULTI_WEBSUMMARY_BUILDER.multi_web_summaries,
        multi_metrics_csvs     = MULTI_WEBSUMMARY_BUILDER.metrics_summary_csvs,
    )
}

pipeline BUILD_VDJ_OUTPUTS_CS(
    in  SC_MULTI_CORE multi_core,
    out VdjOutputsCS  vdj_t_outs_cs,
    out html          vdj_t_web_summary,
    out VdjOutputsCS  vdj_b_outs_cs,
    out html          vdj_b_web_summary,
)
{
    return (
        vdj_t_outs_cs     = {
            airr_rearrangement:              self.multi_core.vdj_t_clonotype.airr_rearrangement,
            all_contig_annotations_bed:      self.multi_core.multi_reporter.vdj_t_report.annotations_bed,
            all_contig_annotations_csv:      self.multi_core.vdj_t_clonotype.all_contig_annotations_csv,
            all_contig_annotations_json:     self.multi_core.vdj_t_clonotype.all_contig_annotations_json,
            all_contig_bam:                  self.multi_core.multi_gw.vdj_t.assembler_outs.contig_bam,
            all_contig_bam_bai:              self.multi_core.multi_gw.vdj_t.assembler_outs.contig_bam_bai,
            all_contig_fasta:                self.multi_core.multi_reporter.vdj_t_report.contig_fasta,
            all_contig_fasta_fai:            self.multi_core.multi_reporter.vdj_t_report.contig_fasta_fai,
            all_contig_fastq:                self.multi_core.multi_reporter.vdj_t_report.contig_fastq,
            cell_barcodes:                   self.multi_core.multi_reporter.vdj_t_report.cell_barcodes,
            clonotypes:                      self.multi_core.vdj_t_clonotype.clonotypes_csv,
            concat_ref_bam:                  self.multi_core.vdj_t_clonotype.concat_ref_bam,
            concat_ref_bam_bai:              self.multi_core.vdj_t_clonotype.concat_ref_bam_bai,
            concat_ref_fasta:                self.multi_core.vdj_t_clonotype.concat_ref_fasta,
            concat_ref_fasta_fai:            self.multi_core.vdj_t_clonotype.concat_ref_fasta_fai,
            consensus_annotations_csv:       self.multi_core.vdj_t_clonotype.consensus_annotations_csv,
            consensus_bam:                   self.multi_core.vdj_t_clonotype.consensus_bam,
            consensus_bam_bai:               self.multi_core.vdj_t_clonotype.consensus_bam_bai,
            consensus_fasta:                 self.multi_core.vdj_t_clonotype.consensus_fasta,
            consensus_fasta_fai:             self.multi_core.vdj_t_clonotype.consensus_fasta_fai,
            filtered_contig_annotations_csv: self.multi_core.vdj_t_clonotype.filtered_contig_annotations_csv,
            filtered_contig_fasta:           self.multi_core.multi_reporter.vdj_t_report.filtered_contig_fasta,
            filtered_contig_fastq:           self.multi_core.multi_reporter.vdj_t_report.filtered_contig_fastq,
            metrics_summary_csv:             self.multi_core.multi_reporter.vdj_t_report.metrics_summary_csv,
            vdj_contig_info:                 self.multi_core.multi_reporter.vdj_t_report.vdj_contig_info,
            vloupe:                          self.multi_core.multi_reporter.vdj_t_report.vloupe,
        },
        vdj_t_web_summary = self.multi_core.multi_reporter.vdj_t_report.web_summary,
        vdj_b_outs_cs     = {
            airr_rearrangement:              self.multi_core.vdj_b_clonotype.airr_rearrangement,
            all_contig_annotations_bed:      self.multi_core.multi_reporter.vdj_b_report.annotations_bed,
            all_contig_annotations_csv:      self.multi_core.vdj_b_clonotype.all_contig_annotations_csv,
            all_contig_annotations_json:     self.multi_core.vdj_b_clonotype.all_contig_annotations_json,
            all_contig_bam:                  self.multi_core.multi_gw.vdj_b.assembler_outs.contig_bam,
            all_contig_bam_bai:              self.multi_core.multi_gw.vdj_b.assembler_outs.contig_bam_bai,
            all_contig_fasta:                self.multi_core.multi_reporter.vdj_b_report.contig_fasta,
            all_contig_fasta_fai:            self.multi_core.multi_reporter.vdj_b_report.contig_fasta_fai,
            all_contig_fastq:                self.multi_core.multi_reporter.vdj_b_report.contig_fastq,
            cell_barcodes:                   self.multi_core.multi_reporter.vdj_b_report.cell_barcodes,
            clonotypes:                      self.multi_core.vdj_b_clonotype.clonotypes_csv,
            concat_ref_bam:                  self.multi_core.vdj_b_clonotype.concat_ref_bam,
            concat_ref_bam_bai:              self.multi_core.vdj_b_clonotype.concat_ref_bam_bai,
            concat_ref_fasta:                self.multi_core.vdj_b_clonotype.concat_ref_fasta,
            concat_ref_fasta_fai:            self.multi_core.vdj_b_clonotype.concat_ref_fasta_fai,
            consensus_annotations_csv:       self.multi_core.vdj_b_clonotype.consensus_annotations_csv,
            consensus_bam:                   self.multi_core.vdj_b_clonotype.consensus_bam,
            consensus_bam_bai:               self.multi_core.vdj_b_clonotype.consensus_bam_bai,
            consensus_fasta:                 self.multi_core.vdj_b_clonotype.consensus_fasta,
            consensus_fasta_fai:             self.multi_core.vdj_b_clonotype.consensus_fasta_fai,
            filtered_contig_annotations_csv: self.multi_core.vdj_b_clonotype.filtered_contig_annotations_csv,
            filtered_contig_fasta:           self.multi_core.multi_reporter.vdj_b_report.filtered_contig_fasta,
            filtered_contig_fastq:           self.multi_core.multi_reporter.vdj_b_report.filtered_contig_fastq,
            metrics_summary_csv:             self.multi_core.multi_reporter.vdj_b_report.metrics_summary_csv,
            vdj_contig_info:                 self.multi_core.multi_reporter.vdj_b_report.vdj_contig_info,
            vloupe:                          self.multi_core.multi_reporter.vdj_b_report.vloupe,
        },
        vdj_b_web_summary = self.multi_core.multi_reporter.vdj_b_report.web_summary,
    )
}

pipeline BUILD_SAMPLE_OUTS(
    in  SampleSlfeOuts  sample_slfe_outs,
    in  path            rna_analysis,
    in  path            crispr_analysis,
    in  cloupe          cloupe,
    in  html            web_summary,
    in  csv             metrics_summary_csv,
    in  VdjOutputsCS    vdj_b_outs,
    in  VdjOutputsCS    vdj_t_outs,
    out SampleOutputsCS sample_outs,
)
{
    return (
        sample_outs = {
            count: {
                analysis:                     self.rna_analysis,
                antibody_analysis:            null,
                cloupe:                       self.cloupe,
                crispr_analysis:              self.crispr_analysis,
                feature_reference_csv:        self.sample_slfe_outs.feature_reference,
                sample_alignments:            self.sample_slfe_outs.bam_file,
                sample_alignments_index:      self.sample_slfe_outs.bam_index_file,
                sample_barcodes_csv:          self.sample_slfe_outs.filtered_barcodes,
                sample_feature_bc_matrix:     self.sample_slfe_outs.matrix_h5,
                sample_feature_bc_matrix_mex: self.sample_slfe_outs.matrix_mex,
                sample_molecule_info:         self.sample_slfe_outs.molecule_info,
                target_panel:                 self.sample_slfe_outs.target_panel,
            },
            metrics_summary: self.metrics_summary_csv,
            vdj_b:           self.vdj_b_outs,
            vdj_t:           self.vdj_t_outs,
            web_summary:     self.web_summary,
        },
    )
}

#
# @include "sc_multi_cs.mro"
#

pipeline FULL_VDJ_INPUTS(
    in  VdjInputsCS  cs_inputs,
    out map[]        sample_def,
    out string       chemistry,
    out ChemistryDef custom_chemistry_def,
    out map[]        primers,
    out int          force_cells,
    out float        subsample_rate,
    out int          initial_reads,
    out int          primer_initial_reads,
    out string[]     special_genomic_regions,
    out bool         denovo,
    out int          r1_length,
    out int          r2_length,
    out path         ground_truth_clonotype_path,
    out path         inner_enrichment_primers,
    out string       chain_type,
    out string       physical_library_id,
)
{
    return (
        chemistry                   = "SCVDJ_auto",
        custom_chemistry_def        = null,
        ground_truth_clonotype_path = null,
        initial_reads               = null,
        primer_initial_reads        = null,
        primers                     = [
            {
                "name": "P5",
                "seq": "AATGATACGGCGACCACCGAGATCT",
            },
            {
                "name": "P7",
                "seq": "CAAGCAGAAGACGGCATACGAGAT",
            },
            {
                "name": "R1",
                "seq": "ACACTCTTTCCCTACACGACGCTCTTCCGATCT",
            },
            {
                "name": "R2",
                "seq": "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT",
            },
            {
                "name": "polyA",
                "seq": "AAAAAAAAAAAAAAAAAAAA",
            },
            {
                "name": "rt_primer",
                "seq": "AAGCAGTGGTATCAACGCAGAGTACAT",
            },
            {
                "name": "spacer",
                "seq": "TTTCTTATATGGG",
            },
        ],
        special_genomic_regions     = null,
        subsample_rate              = null,
        *                           = self.cs_inputs,
    )
}

# Fills in CountInputs from CountInputsMinimal plus default values.
pipeline FULL_COUNT_INPUTS(
    in  CountInputsMinimal cs_inputs,
    out map[]              sample_def,
    out string             chemistry,
    out ChemistryDef       custom_chemistry_def,
    out path               reference_path,
    out json               gene_index,
    out map[]              primers,
    out CellCalling        cell_calling_config,
    out float              subsample_rate,
    out int                initial_reads,
    out int                primer_initial_reads,
    out string[]           special_genomic_regions,
    out int                r1_length,
    out int                r2_length,
    out int                trim_polya_min_score,
    out int                trim_tso_min_score,
    out bool               no_bam,
    out bool               no_secondary_analysis,
    out bool               no_target_umi_filter,
    out file               feature_reference,
    out bool               include_introns,
    out string             aligner,
    out string             probe_barcodes_intended_pairing,
    out map                genetic_demux_params,
    out bool               enforce_library_concordance,
    out string[]           count_allowed_chems,
    out string             throughput,
    out json               force_sample_barcodes,
    out bool               tenx_cmos,
)
{
    call WRITE_GENE_INDEX(
        reference_path = self.cs_inputs.reference_path,
    )

    return (
        aligner                     = null,
        probe_barcodes_intended_pairing = null,
        custom_chemistry_def        = null,
        enforce_library_concordance = true,
        gene_index                  = WRITE_GENE_INDEX.gene_index,
        genetic_demux_params        = null,
        throughput                  = null,
        initial_reads               = null,
        primer_initial_reads        = null,
        primers                     = [
            {
                "name": "P5",
                "seq": "AATGATACGGCGACCACCGAGATCT",
            },
            {
                "name": "P7",
                "seq": "CAAGCAGAAGACGGCATACGAGAT",
            },
            {
                "name": "R1",
                "seq": "ACACTCTTTCCCTACACGACG",
            },
            {
                "name": "R2",
                "seq": "GTGACTGGAGTTCAGACGTGTG",
            },
            {
                "name": "switch_oligo",
                "seq": "AAGCAGTGGTATCAACGCAGAGTACATGGG",
            },
            {
                "name": "polyA",
                "seq": "AAAAAAAAAAAAAAAAAAAA",
            },
        ],
        special_genomic_regions     = null,
        subsample_rate              = null,
        trim_polya_min_score        = 20,
        trim_tso_min_score          = 20,
        count_allowed_chems         = [
            "auto",
            "custom",
            "threeprime",
            "fiveprime",
            "SC3P_auto",
            "SC5P_auto",
            "SC3Pv1",
            "SC3Pv2",
            "SC3Pv3",
            "SC3Pv3LT",
            "SC5P-PE",
            "SC5P-R1",
            "SC5P-R2",
            "SC-FB",
            "ARC-v1",
        ],
        *                           = self.cs_inputs,
    )
}

stage EMPTY_IF_NULL(
    in  VdjInputs[] raw,
    out VdjInputs[] not_null,
    src py          "../rna/stages/common/empty_if_null",
)

pipeline SC_MULTI_CS(
    in  string               sample_id,
    in  string               sample_desc,
    in  FileOrBytes          config,
    in  string               config_hash,
    out csv                  config           "Multi Config CSV",
    out VdjRefFolder         vdj_reference    "V(D)J reference",
    out MultiOutputsCS       multi,
    out map<SampleOutputsCS> per_sample_outs,
)
{
    call MULTI_PREFLIGHT as MULTI_PREFLIGHT_LOCAL(
        config = self.config,
        is_pd  = false,
    ) using (
        local     = true,
        preflight = true,
    )

    call MULTI_PREFLIGHT(
        config = self.config,
        is_pd  = false,
    ) using (
        preflight = true,
    )

    call PARSE_MULTI_CONFIG(
        sample_id   = self.sample_id,
        sample_desc = self.sample_desc,
        config      = self.config,
        config_hash = self.config_hash,
        params      = null,
        is_pd       = false,
    )

    call FULL_COUNT_INPUTS(
        cs_inputs = PARSE_MULTI_CONFIG.count_input,
    ) using (
        disabled = PARSE_MULTI_CONFIG.basic_config.disable_count,
    )

    map call FULL_VDJ_INPUTS(
        cs_inputs = split PARSE_MULTI_CONFIG.vdj_inputs,
    )

    call SC_MULTI_CORE(
        common_input          = PARSE_MULTI_CONFIG.common_input,
        count_input           = FULL_COUNT_INPUTS,
        vdj_inputs            = FULL_VDJ_INPUTS,
        vdj_gen_inputs        = PARSE_MULTI_CONFIG.vdj_gen_inputs,
        basic_config          = PARSE_MULTI_CONFIG.basic_config,
        multi_graph           = PARSE_MULTI_CONFIG.multi_graph,
        multi_config          = PARSE_MULTI_CONFIG.config_file,
        is_pd                 = false,
        count_allowed_chems   = FULL_COUNT_INPUTS.count_allowed_chems,
        count_pipestance_type = "SC_RNA_COUNTER_CS",
    )

    call BUILD_VDJ_OUTPUTS_CS(
        multi_core = SC_MULTI_CORE,
    )

    call SANITIZE_MAP_CALLS(
        in_crispr_analysis    = SC_MULTI_CORE.sample_analyzer.crispr_analyzer.crispr_analysis,
        in_rna_analysis       = SC_MULTI_CORE.sample_analyzer.rna_analyzer.analysis_csv,
        in_cloupe_file        = SC_MULTI_CORE.sample_reporter.cloupe,
        in_metrics_summary    = SC_MULTI_CORE.sample_reporter.metrics_summary,
        in_sample_tsne_plots  = null,
        in_barcode_rank_plots = null,
    )

    map call BUILD_SAMPLE_OUTS(
        sample_slfe_outs    = split SC_MULTI_CORE.sample_outs,
        crispr_analysis     = split SANITIZE_MAP_CALLS.crispr_analysis,
        rna_analysis        = split SANITIZE_MAP_CALLS.rna_analysis,
        cloupe              = split SANITIZE_MAP_CALLS.cloupe_file,
        web_summary         = split SC_MULTI_CORE.multi_web_summaries,
        metrics_summary_csv = split SC_MULTI_CORE.multi_metrics_csvs,
        vdj_b_outs          = BUILD_VDJ_OUTPUTS_CS.vdj_b_outs_cs,
        vdj_t_outs          = BUILD_VDJ_OUTPUTS_CS.vdj_t_outs_cs,
    )

    return (
        config          = PARSE_MULTI_CONFIG.config_file,
        vdj_reference   = SC_MULTI_CORE.vdj_ref_out,
        multi           = {
            count: {
                feature_reference_csv:       SC_MULTI_CORE.multi_reporter.count_summary.feature_reference,
                raw_cloupe:                  SC_MULTI_CORE.multi_reporter.cloupe,
                raw_feature_bc_matrix_h5:    SC_MULTI_CORE.multi_gw.count.basic_counter_outs.raw_gene_bc_matrices_h5,
                raw_feature_bc_matrix_mex:   SC_MULTI_CORE.multi_gw.count.basic_counter_outs.raw_gene_bc_matrices_mex,
                raw_molecule_info_h5:        SC_MULTI_CORE.multi_gw.count.basic_counter_outs.molecule_info,
                unassigned_alignments:       SC_MULTI_CORE.unassigned_bam,
                unassigned_alignments_index: SC_MULTI_CORE.unassigned_bai,
            },
            multiplexing_analysis: SC_MULTI_CORE.multi_gw.count.basic_counter_outs.assign_tags,
            vdj_b:                 BUILD_VDJ_OUTPUTS_CS.vdj_b_outs_cs,
            vdj_t:                 BUILD_VDJ_OUTPUTS_CS.vdj_t_outs_cs,
        },
        per_sample_outs = BUILD_SAMPLE_OUTS.sample_outs,
    )
}

#
# @include "rna/sc_rna_counter_cs.mro"
#

pipeline _STRUCTIFY(
    in  CommonInputs        common_input,
    in  CountInputsCS       count_input,
    in  BasicPipelineConfig config,
    out CommonInputs        common_input,
    out CountInputsCS       count_input,
    out BasicPipelineConfig config,
)
{
    return (
        common_input = self.common_input,
        count_input  = self.count_input,
        config       = self.config,
    )
}

pipeline SC_RNA_COUNTER_CS(
    in  string  sample_id,
    in  map[]   sample_def,
    in  string  sample_desc,
    in  path    reference_path,
    in  int     recovered_cells,
    in  bool    no_bam,
    in  bool    no_secondary_analysis,
    in  bool    no_target_umi_filter,
    in  int     force_cells,
    in  bool    include_introns,
    in  string  chemistry,
    in  int     r1_length,
    in  int     r2_length,
    in  int     trim_polya_min_score,
    in  int     trim_tso_min_score,
    in  csv     feature_reference,
    out html    web_summary                    "Run summary HTML",
    out csv     metrics_summary                "Run summary CSV",
    out bam     possorted_genome_bam           "BAM"                       "possorted_genome_bam.bam",
    out bam.bai possorted_genome_bam_index     "BAM index"                 "possorted_genome_bam.bam.bai",
    out path    filtered_feature_bc_matrix     "Filtered feature-barcode matrices MEX",
    out h5      filtered_feature_bc_matrix_h5  "Filtered feature-barcode matrices HDF5"  "filtered_feature_bc_matrix.h5",
    out path    raw_feature_bc_matrix          "Unfiltered feature-barcode matrices MEX",
    out h5      raw_feature_bc_matrix_h5       "Unfiltered feature-barcode matrices HDF5"  "raw_feature_bc_matrix.h5",
    out path    analysis                       "Secondary analysis output CSV",
    out h5      molecule_info                  "Per-molecule read information",
    out path    crispr_analysis                "CRISPR-specific analysis",
    out cloupe  cloupe                         "Loupe Browser file",
    out csv     feature_reference              "Feature Reference",
    out csv     target_panel                   "Target Panel File",
)
{
    call CELLRANGER_PREFLIGHT as CELLRANGER_PREFLIGHT_LOCAL(
        sample_def        = self.sample_def,
        reference_path    = self.reference_path,
        feature_reference = self.feature_reference,
        full_check        = false,
        recovered_cells   = self.recovered_cells,
        force_cells       = self.force_cells,
        r1_length         = self.r1_length,
        r2_length         = self.r2_length,
    ) using (
        local     = true,
        preflight = true,
    )

    call CELLRANGER_PREFLIGHT(
        sample_def        = self.sample_def,
        reference_path    = self.reference_path,
        feature_reference = self.feature_reference,
        full_check        = true,
        recovered_cells   = self.recovered_cells,
        force_cells       = self.force_cells,
        r1_length         = self.r1_length,
        r2_length         = self.r2_length,
    ) using (
        preflight = true,
    )

    call WRITE_GENE_INDEX(
        reference_path = self.reference_path,
    )

    call _STRUCTIFY(
        common_input = {
            multi_config_sha: null,
            sample_desc:      self.sample_desc,
            sample_id:        self.sample_id,
        },
        count_input  = {
            aligner:               null,
            cell_calling_config: {
                cell_barcodes:                  null,
                disable_ab_aggregate_detection: false,
                force_cells:                    self.force_cells,
                override_library_types:         null,
                override_mode:                  null,
                recovered_cells:                self.recovered_cells,
            },
            chemistry:             self.chemistry,
            feature_reference:     self.feature_reference,
            force_sample_barcodes: null,
            gene_index:            WRITE_GENE_INDEX.gene_index,
            include_introns:       self.include_introns,
            no_bam:                self.no_bam,
            no_secondary_analysis: self.no_secondary_analysis,
            no_target_umi_filter:  self.no_target_umi_filter,
            r1_length:             self.r1_length,
            r2_length:             self.r2_length,
            reference_path:        self.reference_path,
            sample_def:            self.sample_def,
            tenx_cmos:             null,
            trim_polya_min_score:  self.trim_polya_min_score,
            trim_tso_min_score:    self.trim_tso_min_score,
        },
        config       = {
            disable_count:       false,
            disable_multi:       true,
            disable_multi_count: true,
            disable_vdj:         true,
        },
    )

    call FULL_COUNT_INPUTS(
        cs_inputs = _STRUCTIFY.count_input,
    ) using (
        disabled = _STRUCTIFY.config.disable_count,
    )

    call SC_MULTI_CORE(
        common_input          = _STRUCTIFY.common_input,
        count_input           = FULL_COUNT_INPUTS,
        vdj_inputs            = [],
        vdj_gen_inputs        = null,
        basic_config          = _STRUCTIFY.config,
        multi_graph           = null,
        multi_config          = null,
        is_pd                 = false,
        count_allowed_chems   = FULL_COUNT_INPUTS.count_allowed_chems,
        count_pipestance_type = "SC_RNA_COUNTER_CS",
    )

    return (
        analysis                      = SC_MULTI_CORE.count_analyzer.rna_analyzer.analysis_csv,
        cloupe                        = SC_MULTI_CORE.multi_reporter.cloupe,
        crispr_analysis               = SC_MULTI_CORE.count_analyzer.crispr_analyzer.crispr_analysis,
        feature_reference             = SC_MULTI_CORE.multi_reporter.count_summary.feature_reference,
        filtered_feature_bc_matrix    = SC_MULTI_CORE.multi_gw.count.basic_counter_outs.filtered_gene_bc_matrices_mex,
        filtered_feature_bc_matrix_h5 = SC_MULTI_CORE.multi_gw.count.basic_counter_outs.filtered_gene_bc_matrices_h5,
        metrics_summary               = SC_MULTI_CORE.multi_reporter.count_summary.metrics_summary_csv,
        molecule_info                 = SC_MULTI_CORE.multi_gw.count.basic_counter_outs.molecule_info,
        possorted_genome_bam          = SC_MULTI_CORE.multi_gw.count.basic_counter_outs.possorted_genome_bam,
        possorted_genome_bam_index    = SC_MULTI_CORE.multi_gw.count.basic_counter_outs.possorted_genome_bam_index,
        raw_feature_bc_matrix         = SC_MULTI_CORE.multi_gw.count.basic_counter_outs.raw_gene_bc_matrices_mex,
        raw_feature_bc_matrix_h5      = SC_MULTI_CORE.multi_gw.count.basic_counter_outs.raw_gene_bc_matrices_h5,
        target_panel                  = SC_MULTI_CORE.multi_gw.count.target_outs.target_panel,
        web_summary                   = SC_MULTI_CORE.multi_reporter.count_summary.web_summary,
    )
}

#
# @include "__count-PAXUGA_Relapse.mro"
#

call SC_RNA_COUNTER_CS(
    sample_id             = "count-PAXUGA_Relapse",
    sample_def            = [
        {
            "fastq_id": null,
            "fastq_mode": "ILMN_BCL2FASTQ",
            "gem_group": null,
            "lanes": null,
            "library_type": "Gene Expression",
            "read_path": "/gpfs/data/sequence/results/aifantislab/2021-runs/2021-09-27/fastq",
            "sample_indices": ["any"],
            "sample_names": ["PAXUGA_Relapse"],
            "subsample_rate": null,
            "target_set": null,
            "target_set_name": null,
        },
    ],
    sample_desc           = "",
    reference_path        = "/gpfs/data/sequence/cellranger-refdata/refdata-gex-GRCh38-2020-A",
    recovered_cells       = null,
    no_bam                = false,
    no_secondary_analysis = false,
    no_target_umi_filter  = false,
    force_cells           = null,
    include_introns       = false,
    chemistry             = "auto",
    r1_length             = null,
    r2_length             = null,
    trim_polya_min_score  = 20,
    trim_tso_min_score    = 20,
    feature_reference     = null,
)
