Skip to content

Commit

Permalink
lint
Browse files Browse the repository at this point in the history
  • Loading branch information
ftabaro committed Jul 18, 2023
1 parent e011ff8 commit 43a8618
Show file tree
Hide file tree
Showing 14 changed files with 432 additions and 420 deletions.
4 changes: 4 additions & 0 deletions env/pandas.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
channels:
- anaconda
dependencies:
- pandas
137 changes: 9 additions & 128 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,129 +29,11 @@ container: "docker://condaforge/mambaforge:latest"

configfile: "config/config.yaml"

#############################
## INCLUDE COMMON FUNCTIONS
#############################

############
## HELPERS
############


def giga_to_byte(g):
g = g - 2 # leave some memory to other processes
return g * (1024**3)


def get_filename(link, decompress=False, stem=False):
# sometimes we use this function to get filenames of paths instead of links.
# convert back Path to str.
if isinstance(link, Path):
link = str(link)
# parse
p = urlparse(link)

# cast to Path and get name attribute
if stem:
basename = Path(p.path).stem
else:
basename = Path(p.path).name

# remove .gz
if decompress and str(basename).endswith(".gz"):
basename = basename.replace(".gz", "")

return basename


def get_samples(wildcards, samples):
if wildcards.serie in samples["single"]:
s = samples["single"][wildcards.serie]
else:
s = samples["paired"][wildcards.serie]
return s

def get_bw(wildcards):
"""Builds bigwig paths for rule all"""
o = []
for lib in library_names_single + library_names_paired:
if lib in samples["single"].keys():
s = samples["single"][lib]
else:
s = samples["paired"][lib]
o += expand(star_folder.joinpath("{serie}", "{sample}.bw"), serie=lib, sample=s)
return o


def get_star_input(wildcards):
"""Builds input paths for STAR alignment testing if a library is single-end or paired-end"""
if wildcards.serie in library_names_single:
for ext in supported_extensions:
infile = trim_reads_folder.joinpath(
wildcards.serie, f"{wildcards.sample}.{ext}"
)
if os.path.exists(infile):
break
else:
for ext in supported_extensions:
infile = [
trim_reads_folder.joinpath(wildcards.serie, f"{wildcards.sample}_1.{ext}"),
trim_reads_folder.joinpath(wildcards.serie, f"{wildcards.sample}_2.{ext}"),
]
if all([os.path.exists(f) for f in infile]):
break
return infile


def get_params(wildcards, key):
"""Returns the value of a specific key for the current serie"""
params = ""
for lib in config["sequencing_libraries"]:
if lib["name"] == wildcards.serie:
params = lib[key]
return params


def get_sample_sheet(wildcards):
"""Returns path to sample sheet for current serie"""
return get_params(wildcards, "sample_sheet")


def get_fastq(wildcards):
if wildcards.serie in library_names_single:
for ext in supported_extensions:
candidate = raw_reads_folder.joinpath(
wildcards.serie, f"{wildcards.sample}.{ext}"
)
if os.path.exists(candidate):
return candidate
raise ValueError(
f"Could not find FastQ file. Check your naming. Supported extensions: {supported_extensions}"
)
return ""


def get_fastq_paired(wildcards):
if wildcards.serie in library_names_paired:
for ext in supported_extensions:
for suffix in supported_suffixes:
candidate1 = raw_reads_folder.joinpath(
wildcards.serie, f"{wildcards.sample}{suffix[0]}.{ext}"
)
candidate2 = raw_reads_folder.joinpath(
wildcards.serie, f"{wildcards.sample}{suffix[1]}.{ext}"
)
if os.path.exists(candidate1) and os.path.exists(candidate2):
return {"m1": candidate1, "m2": candidate2}
raise ValueError(
f"Could not find FastQ files. Check your naming.\nPaired-end suffixed: {supported_suffixes}.\nSupported extensions: {supported_extensions}"
)
return ""


def mkdir(p: Path, verbose=False):
if not p.exists():
p.mkdir(parents=True, exist_ok=True)
if verbose:
print("Created {}".format(p))

include: "include/common.smk"

#######################
## DEFINE VARIABLES
Expand Down Expand Up @@ -208,9 +90,6 @@ gtf_path = references_folder.joinpath(

rmsk_path = Path(config["genome"]["rmsk_path"])
config["genome"]["rmsk_link"] = None
# rmsk_path = references_folder.joinpath(
# get_filename(config["genome"]["rmsk_link"], decompress=False)
# )
rmsk_bed = Path(str(rmsk_path).replace("gtf", "bed"))

gaf_path = references_folder.joinpath(
Expand Down Expand Up @@ -284,9 +163,10 @@ onstart:

print_json(json.dumps(samples))

############
## RULES
############

#############
## WORKFLOW
#############


wildcard_constraints:
Expand All @@ -295,6 +175,7 @@ wildcard_constraints:
method="multihit|random",



include: "include/download-references.smk"
include: "include/fastqc.smk"
include: "include/trim_single.smk"
Expand Down
Loading

0 comments on commit 43a8618

Please # to comment.