Skip to content

Commit ce6f8d5

Browse files
committed
app: fix sonnar issues in BuildCommandExecutor, #TASK-5564
1 parent cd367b9 commit ce6f8d5

File tree

1 file changed

+27
-93
lines changed

1 file changed

+27
-93
lines changed

cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java

+27-93
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
import org.opencb.cellbase.lib.builders.*;
3535
import org.opencb.cellbase.lib.builders.clinical.variant.ClinicalVariantBuilder;
3636

37-
import java.io.File;
3837
import java.io.IOException;
3938
import java.nio.file.Files;
4039
import java.nio.file.Path;
@@ -60,11 +59,8 @@ public class BuildCommandExecutor extends CommandExecutor {
6059
private boolean normalize = true;
6160

6261
private SpeciesConfiguration.Assembly assembly;
63-
private String ensemblVersion;
6462
private String ensemblRelease;
6563

66-
private File ensemblScriptsFolder;
67-
6864
private boolean flexibleGTFParsing;
6965
private SpeciesConfiguration speciesConfiguration;
7066

@@ -75,15 +71,16 @@ public BuildCommandExecutor(AdminCliOptionsParser.BuildCommandOptions buildComma
7571
this.output = Paths.get(buildCommandOptions.outputDirectory);
7672
normalize = !buildCommandOptions.skipNormalize;
7773

78-
this.ensemblScriptsFolder = new File(System.getProperty("basedir") + "/bin/ensembl-scripts/");
7974
this.flexibleGTFParsing = buildCommandOptions.flexibleGTFParsing;
8075
}
8176

82-
8377
/**
8478
* Parse specific 'build' command options.
79+
*
80+
* @throws CellBaseException Exception
8581
*/
86-
public void execute() {
82+
public void execute() throws CellBaseException {
83+
String buildOption = null;
8784
try {
8885
// Output directory need to be created if it doesn't exist
8986
if (!Files.exists(output)) {
@@ -104,7 +101,7 @@ public void execute() {
104101
assembly = SpeciesUtils.getDefaultAssembly(speciesConfiguration);
105102
}
106103

107-
ensemblVersion = assembly.getEnsemblVersion();
104+
String ensemblVersion = assembly.getEnsemblVersion();
108105
ensemblRelease = "release-" + ensemblVersion.split("_")[0];
109106

110107
String spShortName = getSpeciesShortname(speciesConfiguration);
@@ -130,9 +127,8 @@ public void execute() {
130127
}
131128

132129
for (int i = 0; i < buildOptions.length; i++) {
133-
String buildOption = buildOptions[i];
130+
buildOption = buildOptions[i];
134131

135-
logger.info("Building '{}' data", buildOption);
136132
CellBaseBuilder parser = null;
137133
switch (buildOption) {
138134
case EtlCommons.GENOME_DATA:
@@ -156,9 +152,6 @@ public void execute() {
156152
case EtlCommons.PROTEIN_DATA:
157153
parser = buildProtein();
158154
break;
159-
// case EtlCommons.PPI_DATA:
160-
// parser = getInteractionParser();
161-
// break;
162155
case EtlCommons.CONSERVATION_DATA:
163156
parser = buildConservation();
164157
break;
@@ -181,24 +174,26 @@ public void execute() {
181174
parser = buildPharmacogenomics();
182175
break;
183176
default:
184-
logger.error("Build option '" + buildCommandOptions.data + "' is not valid");
177+
logger.error("Build option '{}' is not valid", buildCommandOptions.data);
185178
break;
186179
}
187180

188181
if (parser != null) {
189-
try {
190-
parser.parse();
191-
} catch (Exception e) {
192-
logger.error("Error executing 'build' command " + buildCommandOptions.data + ": " + e.getMessage(), e);
193-
}
182+
logger.info("Building '{}' data ...", buildOption);
183+
parser.parse();
184+
logger.info("Building '{}' data. Done.", buildOption);
194185
parser.disconnect();
195186
}
196187
}
197188
}
198189
} catch (ParameterException e) {
199190
logger.error("Error parsing build command line parameters: " + e.getMessage(), e);
200-
} catch (IOException | CellBaseException e) {
201-
logger.error(e.getMessage());
191+
} catch (Exception e) {
192+
String msg = "Error executing the command 'build'.";
193+
if (StringUtils.isNotEmpty(buildOption)) {
194+
msg += " It was building the data '" + buildOption + "'";
195+
}
196+
throw new CellBaseException(msg, e);
202197
}
203198
}
204199

@@ -207,7 +202,6 @@ private CellBaseBuilder buildRepeats() {
207202
copyVersionFiles(Arrays.asList(repeatsFilesDir.resolve(EtlCommons.TRF_VERSION_FILENAME)));
208203
copyVersionFiles(Arrays.asList(repeatsFilesDir.resolve(EtlCommons.GSD_VERSION_FILENAME)));
209204
copyVersionFiles(Arrays.asList(repeatsFilesDir.resolve(EtlCommons.WM_VERSION_FILENAME)));
210-
// TODO: chunk size is not really used in ConvervedRegionParser, remove?
211205
CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, EtlCommons.REPEATS_JSON);
212206
return new RepeatsBuilder(repeatsFilesDir, serializer);
213207
}
@@ -223,44 +217,11 @@ private void copyVersionFiles(List<Path> pathList) {
223217
try {
224218
Files.copy(path, downloadFolder.resolve(path.getFileName()), StandardCopyOption.REPLACE_EXISTING);
225219
} catch (IOException e) {
226-
logger.warn("Version file {} not found - skipping", path.toString());
220+
logger.warn("Version file {} not found - skipping", path);
227221
}
228222
}
229223
}
230224

231-
// private void buildGenomeInfo() {
232-
// /**
233-
// * To get some extra info about the genome such as chromosome length or cytobands
234-
// * we execute the following script.
235-
// */
236-
// try {
237-
// String outputFileName = downloadFolder.resolve("genome_info.json").toAbsolutePath().toString();
238-
// List<String> args = new ArrayList<>();
239-
// args.addAll(Arrays.asList("--species", speciesConfigurathtion.getScientificName(),
240-
// "--assembly", buildCommandOptions.assembly == null ? getDefaultHumanAssembly() : buildCommandOptions.assembly,
241-
// "-o", outputFileName,
242-
// "--ensembl-libs", configuration.getDownload().getEnsembl().getLibs()));
243-
// if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)
244-
// && !speciesConfiguration.getScientificName().equals("Drosophila melanogaster")) {
245-
// args.add("--phylo");
246-
// args.add("no-vertebrate");
247-
// }
248-
//
249-
// String geneInfoLogFileName = downloadFolder.resolve("genome_info.log").toAbsolutePath().toString();
250-
//
251-
// boolean downloadedGenomeInfo;
252-
// downloadedGenomeInfo = EtlCommons.runCommandLineProcess(ensemblScriptsFolder, "./genome_info.pl", args, geneInfoLogFileName);
253-
//
254-
// if (downloadedGenomeInfo) {
255-
// logger.info(outputFileName + " created OK");
256-
// } else {
257-
// logger.error("Genome info for " + speciesConfiguration.getScientificName() + " cannot be downloaded");
258-
// }
259-
// } catch (IOException | InterruptedException e) {
260-
// e.printStackTrace();
261-
// }
262-
// }
263-
264225
private CellBaseBuilder buildGenomeSequence() throws CellBaseException {
265226
// Sanity check
266227
Path genomeVersionPath = downloadFolder.resolve(GENOME_SUBDIRECTORY).resolve(GENOME_VERSION_FILENAME);
@@ -316,50 +277,19 @@ private CellBaseBuilder buildRegulation() {
316277
}
317278

318279
private CellBaseBuilder buildProtein() {
319-
Path proteinFolder = downloadFolder.resolve("protein");
280+
Path proteinFolder = downloadFolder.resolve(PROTEIN_SUBDIRECTORY);
320281
copyVersionFiles(Arrays.asList(proteinFolder.resolve("uniprotVersion.json"),
321282
proteinFolder.resolve("interproVersion.json")));
322-
CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "protein");
323-
return new ProteinBuilder(proteinFolder.resolve("uniprot_chunks"),
324-
downloadFolder.resolve("protein").resolve("protein2ipr.dat.gz"), speciesConfiguration.getScientificName(), serializer);
325-
}
326-
327-
private void getProteinFunctionPredictionMatrices(SpeciesConfiguration sp, Path geneFolder)
328-
throws IOException, InterruptedException {
329-
logger.info("Downloading protein function prediction matrices ...");
330-
331-
// run protein_function_prediction_matrices.pl
332-
String proteinFunctionProcessLogFile = geneFolder.resolve("protein_function_prediction_matrices.log").toString();
333-
List<String> args = Arrays.asList("--species", sp.getScientificName(), "--outdir", geneFolder.toString(),
334-
"--ensembl-libs", configuration.getDownload().getEnsembl().getLibs());
335-
336-
boolean proteinFunctionPredictionMatricesObtaines = EtlCommons.runCommandLineProcess(ensemblScriptsFolder,
337-
"./protein_function_prediction_matrices.pl",
338-
args,
339-
proteinFunctionProcessLogFile);
340-
341-
// check output
342-
if (proteinFunctionPredictionMatricesObtaines) {
343-
logger.info("Protein function prediction matrices created OK");
344-
} else {
345-
logger.error("Protein function prediction matrices for " + sp.getScientificName() + " cannot be downloaded");
346-
}
347-
}
348-
349-
private CellBaseBuilder getInteractionParser() {
350-
Path proteinFolder = downloadFolder.resolve("protein");
351-
Path psimiTabFile = proteinFolder.resolve("intact.txt");
352-
copyVersionFiles(Arrays.asList(proteinFolder.resolve("intactVersion.json")));
353-
CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "protein_protein_interaction");
354-
return new InteractionBuilder(psimiTabFile, speciesConfiguration.getScientificName(), serializer);
283+
CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, PROTEIN_DATA);
284+
return new ProteinBuilder(proteinFolder.resolve("uniprot_chunks"), downloadFolder.resolve(PROTEIN_SUBDIRECTORY)
285+
.resolve("protein2ipr.dat.gz"), speciesConfiguration.getScientificName(), serializer);
355286
}
356287

357288
private CellBaseBuilder buildConservation() {
358289
Path conservationFilesDir = downloadFolder.resolve("conservation");
359290
copyVersionFiles(Arrays.asList(conservationFilesDir.resolve("gerpVersion.json"),
360291
conservationFilesDir.resolve("phastConsVersion.json"),
361292
conservationFilesDir.resolve("phyloPVersion.json")));
362-
// TODO: chunk size is not really used in ConvervedRegionParser, remove?
363293
int conservationChunkSize = MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE;
364294
CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder);
365295
return new ConservationBuilder(conservationFilesDir, conservationChunkSize, serializer);
@@ -406,10 +336,14 @@ private Path getFastaReferenceGenome() throws CellBaseException {
406336
Path fastaPath = downloadFolder.resolve(GENOME_SUBDIRECTORY).resolve(fastaFilename);
407337
if (fastaPath.toFile().exists()) {
408338
// Gunzip
409-
logger.info("Gunzip file: " + fastaPath);
339+
logger.info("Gunzip file: {}", fastaPath);
410340
try {
411341
EtlCommons.runCommandLineProcess(null, "gunzip", Collections.singletonList(fastaPath.toString()), null);
412-
} catch (IOException | InterruptedException e) {
342+
} catch (IOException e) {
343+
throw new CellBaseException("Error executing gunzip in FASTA file " + fastaPath, e);
344+
} catch (InterruptedException e) {
345+
// Restore interrupted state...
346+
Thread.currentThread().interrupt();
413347
throw new CellBaseException("Error executing gunzip in FASTA file " + fastaPath, e);
414348
}
415349
}

0 commit comments

Comments
 (0)