vep-cache: new package (#44523)

* py-uvloop: add v3.8.14, v3.9.15, v3.10.3 and dependencies

* rollback

* vep: add v110,v111,v112

* vep-cache: add v110,v111,v112

* Cleanup

* Reorganizigng

Signed-off-by: Teague Sterling <teaguesterling@gmail.com>

* Update package.py

* Update package.py

* [@spackbot] updating style on behalf of teaguesterling

* Update package.py

* Update package.py

* Update package.py

* [@spackbot] updating style on behalf of teaguesterling

* Update package.py

* [@spackbot] updating style on behalf of teaguesterling

* Fix scoping and syntax issues

Signed-off-by: Teague Sterling <teaguesterling@gmail.com>

* fix styles

Signed-off-by: Teague Sterling <teaguesterling@gmail.com>

* fix variants

* fixing up variant issues and cleaning up resource code

Signed-off-by: Teague Sterling <teaguesterling@gmail.com>

* fixing unused imports

Signed-off-by: Teague Sterling <teaguesterling@gmail.com>

* Apply suggestions from code review

Co-authored-by: Arne Becker <101113822+EbiArnie@users.noreply.github.com>

* fixing vep dependencies

Signed-off-by: Teague Sterling <teaguesterling@gmail.com>

* Fixing resources

Signed-off-by: Teague Sterling <teaguesterling@gmail.com>

* Fixing issue where resources are not downloaded

Signed-off-by: Teague Sterling <teaguesterling@gmail.com>

* vep-cache fixing downloads

Signed-off-by: Teague Sterling <teaguesterling@gmail.com>

* defaulting to using VEP installer

Signed-off-by: Teague Sterling <teaguesterling@gmail.com>

* Removing resource-based cache installation and simplifying package. Resources without checksums doesn't work (anymore?) and calculating them with be difficult

Signed-off-by: Teague Sterling <teaguesterling@gmail.com>

---------

Signed-off-by: Teague Sterling <teaguesterling@gmail.com>
Co-authored-by: Arne Becker <101113822+EbiArnie@users.noreply.github.com>
This commit is contained in:
Teague Sterling 2024-11-04 03:37:21 -08:00 committed by GitHub
parent c9ed91758d
commit 10f7014add
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -0,0 +1,151 @@
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
from spack.package import *
class VepCache(Package):
"""Separate installation and management for the Ensembl Variant Effect Predictor (vep)"""
homepage = "https://useast.ensembl.org/info/docs/tools/vep/index.html"
maintainers("teaguesterling")
# This is a dummy value to get spack to resolve resources, which are not downloaded
# when has_code = False
has_code = False
license("Apache-2.0", checked_by="teaguesterling")
vep_versions = ["112", "111", "110"]
depends_on("vep", type="build")
for major in vep_versions:
version(major)
depends_on(f"vep@{major}", type="build", when=f"@{major}+match_vep_version")
vep_assembly_sources = ["ensembl", "refseq", "merged"]
# This is an incomplete list
vep_species = {
"bos_taurus": ["UMD3.1"],
"danio_rerio": ["GRCz11"],
"homo_sapiens": ["GRCh38", "GRCh37"],
"mus_musculus": ["GRCm38"],
"rattus_norvegicus": ["Rnor_6.0"],
}
variant("match_vep_version", default=True, description="Match cache and software version")
variant("env", default=True, description="Setup VEP environment variables for this cache")
# Cache configuration options
variant("fasta", default=True, description="Add FASTA files to the cache")
variant("indexed", default=True, description="Use indexed cache")
variant(
"assembly_source",
values=vep_assembly_sources,
default="ensembl",
description="What reference genome source",
)
variant(
"species",
values=vep_species.keys(),
default="homo_sapiens",
description="Which species to download the cache for (only one at a time)",
)
variant(
"assembly",
values=["latest"]
+ [
conditional(*assemblies, when=f"species={species}")
for species, assemblies in vep_species.items()
],
default="latest",
multi=False,
description="Which assembly of genome to use (only needed for homo sapiens)",
)
def cache_from_spec(self, spec):
variants = spec.variants
indexed = spec.satisfies("+indexed")
cache_type = variants["assembly_source"].value
species = variants["species"].value
assembly = variants["assembly"].value
assembly = self.vep_species[species][0] if assembly == "latest" else assembly
return indexed, cache_type, species, assembly
def vep_cache_config(self, base):
spec = self.spec
cache_version = spec.version.up_to(1)
indexed, cache_type, species, assembly = self.cache_from_spec(spec)
user_root = join_path(base, "share", "vep")
root = user_root # Should this be VEP install dir?
suffix = "" if cache_type == "ensembl" else f"_{cache_type}"
species_cache = f"{species}{suffix}"
if species == "homo_sapiens":
cache_dir = join_path(species, f"{cache_version}_{assembly}")
else:
cache_dir = join_path(species, f"{cache_version}")
return {
"root": root,
"user_root": user_root,
"version": f"{cache_version}",
"type": f"{cache_type}",
"species": species,
"cache_species": species_cache,
"assembly": f"{assembly}",
"indexed": indexed,
"dir": cache_dir,
"full_path": join_path(root, cache_dir),
}
def setup_run_environment(self, env):
if self.spec.satisfies("+env"):
cache = self.vep_cache_config(self.home)
env.set("VEP_OFFLINE", "1")
env.set("VEP_CACHE", "1")
env.set("VEP_DIR", cache["user_root"])
env.set("VEP_SPECIES", cache["species"])
env.set("VEP_CACHE_VERSION", cache["version"])
if cache["assembly"] is not None:
env.set("VEP_ASSEMBLY", cache["assembly"])
if cache["type"] == "refseq":
env.set("VEP_REFSEQ", "1")
if cache["type"] == "merged":
env.set("VEP_MERGED", "1")
if self.spec.satisfies("+fasta"):
pass
def cache_installer_args(self):
cache = self.vep_cache_config(self.prefix)
args = [
"--CACHEDIR",
cache["full_path"],
"--CACHE_VERSION",
cache["version"],
"--SPECIES",
cache["cache_species"],
]
if cache["species"] == "homo_sapiens":
args += ["--ASSEMBLY", cache["assembly"]]
return args
def installer_args(self):
auto = "cf" if self.spec.satisfies("+fasta") else "c"
args = ["--AUTO", auto, "--NO_UPDATE", "--NO_TEST"]
args += self.cache_installer_args()
return args
def install_with_installer(self):
vep = self.spec["vep"].package
installer = which(vep.vep_installer_path)
installer(*self.installer_args())
def install(self, spec, prefix):
cache = self.vep_cache_config(self.prefix)
mkdirp(cache["full_path"])
self.install_with_installer()