#!/bin/bash

shopt -s expand_aliases

alias ~=”:«’~bash’”

:«’~~~bash’

Usage

$ markerIndices=marker1,marker2,... \
  minScores=score1,score2,... \
  demultiplex.sh \
    removeDuplicates_file
---
title: demultiplex.sh
---
flowchart TD
    UNIQUE[(
        

removeDuplicates_file

R1 R2 ... #
... ... ... ...
)] UNIQUE --> DM[demultiplex.sh] MK1[(marker1)] --> DM MK2[(marker2)] --> DM MKN[(...)] --> DM SCORE[(

minScores

score1 score2 ...
... ... ...
)] SCORE --> DM DM --> ONTARGET[(

stdout

R1 R2 ... # id rstart1 rend1 qstart1 qend1 rstart2 rend2 qstart2 qend2 ...
... ... ... ... ... ... ... ... ... ... ... ... ... ...
)]

Source

mapMarker()
{
    # Usage: mapMarker minScore markerIndex <reads
    # Map reads to marker by bowtie2
    # Input: plain reads
    # Output: sam file without header
    minScore=$1
    markerIndex=$2
    bowtie2 --quiet --mm --norc --local -L 15 --ma 1 --mp 2,2 --rdg 3,1 --rfg 3,1 --score-min C,${minScore} -r -x "${markerIndex}" -U - 2>/dev/null | samtools view
}

filterMarkers()
{
    # Input: seq1|seq2|...|count|flag1|refId1|markerStart1|markerEnd1|seqStart1|seqEnd1|flag2|refId2|markerStart2|markerEnd2|seqStart2|seqEnd2|...
    # Output: seq1|seq2|...|count|refId|markerStart1|markerEnd1|seqStart1|seqEnd1|markerStart2|markerEnd2|seqStart2|seqEnd2|...
    # filter out rows with one of the following happens:
    # 1. seqN failed to align markerIndexN
    # 2. refIdM != refIdN for some M and N
    gawk -F "\t" -v OFS="\t" -v firstFlagPos=$((${#markerIndexArray[@]}+2)) '
        {
            refId = $(firstFlagPos+1)
            for (i = firstFlagPos; i <= NF; i += 6) {
                if ($i != 0 || $(i+1) != refId) {
                    next
                }
            }
            for (i = 1; i < firstFlagPos; ++i) {
                printf("%s\t", $i)
            }
            printf("%s\t", $(firstFlagPos+1))
            for (i = firstFlagPos; i < NF; ++i) {
                if ((i - firstFlagPos) % 6 <= 1) {
                    continue
                }
                printf("%s\t", $i)
            }
            printf("%s\n", $NF)
        }
    '
}

inputFile=$1

IFS=',' read -r -a markerIndexArray <<< "${markerIndices}"
IFS=',' read -r -a minScoreArray <<< "$minScores"
maps=""
for ii in ${!markerIndexArray[@]}
do
    maps="${maps} <(cut -f$((ii+1)) ${inputFile} | mapMarker ${minScoreArray[$ii]} ${markerIndexArray[$ii]} | gawk -f getAlignPos.awk)"
done

eval paste "${inputFile}" $maps | filterMarkers
alias ~~~=":" # This suppresses a warning and is not part of source.