#!/bin/bash
shopt -s expand_aliases
alias ~=”:«’~bash’”
:«’~~~bash’
Usage
$ getSxPlasmidFileRef.sh \
plasmid_file \
genome \
bowtie2index \
[ext1up ext1down ext2up ext2down]
---
title: getSxPlasmidFileRef
---
flowchart TD
PF[(
plasmid_file
| adapter#0040;20bp#0041; + sgRNA#0040;20bp#0041; + scaffold#0040;83/93bp#0041; + query#0040;44bp#0041; + 3bp + RCbarcode#0040;18bp#0041; + RCprimer#0040;21bp#0041; |
|---|
| ... |
genome_file
)] --> GSPFR BI[(bowtie2index
)] --> GSPFR EXT[(extentions
| ext1up | ext1down | ext2up | ext2down |
stdout
| start1 | ref1 | end1 | start2 | ref2 | end2 |
|---|---|---|---|---|---|
| ... | ... | ... | ... | ... | ... |
- Extract references from the in-house
plasmid_fileand output tostdout. - For
NGGplasmid_file, the 44bpquerycan be perfectly mapped to the genome. - For
NAAplasmid_file, 17~18bp ofqueryisTT, which should be replaced byCCin order to map genome. - The actual cut site is inferred from mapping result.
ref1consists ofext1upbases upstream to the cut site andext1downbases downstream to the cut site.ref2consists ofext2upbases upstream to the cut site andext2downbases downstream to the cut site.- Note that for
NAAplasmid_file, the retrieved reference need replaceGG(queryand reference have opposite strands, soCCbecomesGG) back toAA.
Source
plasmid_file=$1
genome=$2
bowtie2index=$3
ext1up=${4:-50}
ext1down=${5:-0}
ext2up=${6:-10}
ext2down=${7:-100}
getSxPlasmidFileTarget.pl "${plasmid_file}" | bowtie2 --quiet --mm -x "${bowtie2index}" -r -U - 2> /dev/null | samtools view | gawk -f sxTargetSam2Bed.awk -- ${ext1up} ${ext1down} ${ext2up} ${ext2down} | bedtools getfasta -s -fi "${genome}" -bed - | sed '1~2d' | getSxRefFile.pl ${ext1up} ${ext2up} ${plasmid_file: -6:1}
alias ~~~=":" # This suppresses a warning and is not part of source.