#!/bin/bash

shopt -s expand_aliases

alias ~=”:«’~bash’”

:«’~~~bash’

Usage

$ sxCutR2AdapterFilterCumulate.sh \
    demultiplex_file \
    minToMapShear
---
title: sxCutR2AdapterFilterCumulate.sh
---
flowchart TD
    ONTARGET[(
        

demultiplex_file

R2 R1 # id rstart2 rend2 qstart2 qend2 rstart1 rend1 qstart1 qend1 ...
... ... ... ... ... ... ... ... ... ... ... ... ... ...
)] --> sxCRAFC[sxCutR2AdapterFilterCumulate.sh] sxCRAFC --> QUERY[(

stdout

query # id
... ... ...
)]

Source

cutadaptPlain()
{
    # Usage: cutadaptPlain <plainseq 3'adapter
    # cutadapt does not accept plainseq. This function transform plainseq to fasta before feed to cutadapt, and then transform the fasta output back to plainseq
    # Input: plainseq
    # Output: 3' trimmed plainseq
    sed '=' | sed '1~2s/^/>s/' | cutadapt -a $1 - 2> /dev/null | sed '1~2d'
}

rmDupFile=$1
minToMapShear=$2
cut -f1 $rmDupFile | cutadaptPlain GCACCGACTCGGTGCCACTTTTTCAAGTTGATAACGGACTAGCCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC | paste - <(cut -f3-4,8 $rmDupFile) | gawk -F "\t" -v OFS="\t" -v minToMapShear=$minToMapShear '
{
    if ($4 + 3 + minToMapShear <= length($1)) {
        print substr($1, $4 + 4), $2, $3
    }
}' | gawk -f sxCumulateToMapCutAdaptMarker.awk
alias ~~~=":" # This suppresses a warning and is not part of source.