Merge pull request #3 from lifeomic/javadoc

Javadoc
lifeomic · Nov 8, 2017 · e0555f9 · e0555f9
2 parents 29dca63 + 9e0730f
commit e0555f9
Show file tree

Hide file tree

Showing 5 changed files with 138 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -4,13 +4,22 @@ Spark VCF data source implementation in native spark without Hadoop-bam.
 # Introduction
 
 Spark VCF allows you to natively load VCFs into an Apache Spark Dataframe/Dataset. To get started with Spark-VCF, you can 
-clone or download this repository, then run `mvn package` and use the jar. In the very near future, spark-vcf will 
-be added to Maven Central.
+clone or download this repository, then run `mvn package` and use the jar. We are also now in Maven central.
 
 Since spark-vcf is written specifically for Spark, it comes with large performance gains over frameworks like ADAM.
 
 # Getting Started
 
+To install spark vcf, add the following to your pom:
+
+```
+<dependency>
+  <groupId>com.lifeomic</groupId>
+  <artifactId>spark-vcf</artifactId>
+  <version>0.1.0</version>
+</dependency>
+```
+
 Getting started with Spark VCF is as simple as:
 
 ```scala
@@ -64,7 +73,6 @@ val sparkConf = new SparkConf()
 ```
 
 # TODO
-* put release in Maven Central
 * Provide performance benchmarks compared to ADAM
 * Get Travis CI set up
 

diff --git a/pom.xml b/pom.xml
@@ -4,9 +4,9 @@
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 
     <modelVersion>4.0.0</modelVersion>
-    <groupId>spark-vcf</groupId>
+    <groupId>com.lifeomic</groupId>
     <artifactId>spark-vcf</artifactId>
-    <version>0.1.0-SNAPSHOT</version>
+    <version>0.2.0-SNAPSHOT</version>
     <packaging>jar</packaging>
 
     <name>Spark VCF</name>
@@ -28,6 +28,15 @@
         <tag>master</tag>
     </scm>
 
+    <developers>
+        <developer>
+            <name>Derek Miller</name>
+            <email>derek.miller@lifeomic.com</email>
+            <organization>Lifeomic</organization>
+            <organizationUrl>https://lifeomic.com</organizationUrl>
+        </developer>
+    </developers>
+
     <properties>
         <java.version>1.8</java.version>
         <scala.version>2.11.8</scala.version>
@@ -37,6 +46,17 @@
         <jackson.version>2.6.5</jackson.version>
     </properties>
 
+    <distributionManagement>
+        <snapshotRepository>
+            <id>ossrh</id>
+            <url>https://oss.sonatype.org/content/repositories/snapshots</url>
+        </snapshotRepository>
+        <repository>
+            <id>ossrh</id>
+            <url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
+        </repository>
+    </distributionManagement>
+
     <dependencies>
         <dependency>
             <groupId>org.scala-lang</groupId>
@@ -99,11 +119,12 @@
             <version>7.9.0</version>
         </dependency>
 
-
     </dependencies>
 
     <build>
 
+        <finalName>spark-vcf</finalName>
+
         <plugins>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
@@ -115,6 +136,34 @@
                 </configuration>
             </plugin>
 
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-source-plugin</artifactId>
+                <version>2.2.1</version>
+                <executions>
+                    <execution>
+                        <id>attach-sources</id>
+                        <goals>
+                            <goal>jar-no-fork</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-javadoc-plugin</artifactId>
+                <version>3.0.0-M1</version>
+                <executions>
+                    <execution>
+                        <id>attach-javadocs</id>
+                        <goals>
+                            <goal>jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+
             <plugin>
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>build-helper-maven-plugin</artifactId>
@@ -158,12 +207,46 @@
                             <goal>testCompile</goal>
                         </goals>
                     </execution>
+                    <execution>
+                        <id>attach-scaladocs</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>doc-jar</goal>
+                        </goals>
+                    </execution>
                 </executions>
                 <configuration>
                     <scalaVersion>${scala.version}</scalaVersion>
                 </configuration>
             </plugin>
 
+            <plugin>
+                <groupId>org.sonatype.plugins</groupId>
+                <artifactId>nexus-staging-maven-plugin</artifactId>
+                <version>1.6.7</version>
+                <extensions>true</extensions>
+                <configuration>
+                    <serverId>ossrh</serverId>
+                    <nexusUrl>https://oss.sonatype.org/</nexusUrl>
+                    <autoReleaseAfterClose>true</autoReleaseAfterClose>
+                </configuration>
+            </plugin>
+
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-gpg-plugin</artifactId>
+                <version>1.5</version>
+                <executions>
+                    <execution>
+                        <id>sign-artifacts</id>
+                        <phase>verify</phase>
+                        <goals>
+                            <goal>sign</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+
         </plugins>
 
     </build>

diff --git a/src/main/scala/com/lifeomic/variants/DefaultSource.scala b/src/main/scala/com/lifeomic/variants/DefaultSource.scala
@@ -30,10 +30,23 @@ import org.apache.spark.sql.types.StructType
 
 class DefaultSource extends RelationProvider with SchemaRelationProvider {
 
+    /**
+      * Creates relation
+      * @param sqlContext spark sql context
+      * @param parameters parameters for job
+      * @return Base relation
+      */
     override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = {
         createRelation(sqlContext, parameters, null)
     }
 
+    /**
+      * Creates relation with user schema
+      * @param sqlContext spark sql context
+      * @param parameters parameters for job
+      * @param schema user defined schema
+      * @return Base relation
+      */
     override def createRelation(sqlContext: SQLContext, parameters: Map[String, String], schema: StructType): BaseRelation = {
         createPrivateRelation(sqlContext, parameters)
     }

diff --git a/src/main/scala/com/lifeomic/variants/VCFFunctions.scala b/src/main/scala/com/lifeomic/variants/VCFFunctions.scala
@@ -4,6 +4,11 @@ import com.lifeomic.variants.VCFConstants._
 
 object VCFFunctions {
 
+    /**
+      * Returns a meta row of the key, value and number
+      * @param t formatType
+      * @return
+      */
     def metaHandler(t: String) : (String) => (String, (String, String)) = (item: String) => {
         val z = item.replace("<", "").replace(t, "")
         val filtered = z.split(",").filter(item => item.startsWith(ID) || item.startsWith(TYPE) || item.startsWith(NUMBER))
@@ -26,6 +31,15 @@ object VCFFunctions {
         (key, (value, number))
     }
 
+    /**
+      * Extends fields for format and info columns
+      * @param mapFlag should use map or not
+      * @param map parameter map
+      * @param schFields column fields
+      * @param start start index
+      * @param end end index
+      * @return
+      */
     def fieldsExtended(mapFlag: Boolean,
                        map: Map[String, String],
                        schFields: Array[(String, String, String)],

diff --git a/src/main/scala/com/lifeomic/variants/VCFResourceRelation.scala b/src/main/scala/com/lifeomic/variants/VCFResourceRelation.scala
@@ -32,7 +32,15 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.functions._
 import com.lifeomic.variants.VCFConstants._
 
-
+/**
+  * Spark vcf resource relation
+  * @param sqlContext Spark sql context
+  * @param path path of vcf file(s)
+  * @param useFormatTypes Type checking for formats, plus casting types
+  * @param useFormatAsMap Use the format column as a map
+  * @param useAnnotationTypes Type casting for info fields
+  * @param useAnnotationAsMap use annotations as a map
+  */
 class VCFResourceRelation(
                              override val sqlContext: SQLContext,
                              path: String,
@@ -75,8 +83,7 @@ class VCFResourceRelation(
     private val annotations = vcf.filter(col(TEXT_VALUE).startsWith("##INFO")).map(_.getString(1)).rdd.map(VCFFunctions.metaHandler("##INFO="))
     private var annotationCount = 1
 
-
-    /**
+    /*
       * order is
       * 0. chromosome
       * 1. position
@@ -93,6 +100,10 @@ class VCFResourceRelation(
       */
     override val schema: StructType = inferSchema()
 
+    /**
+      * Runs the vcf queries and converts them to an rdd of rows
+      * @return rdd of a spark sql row
+      */
     override def buildScan(): RDD[Row] = {
         val schFields = schema.fields.map(item => (item.name, item.dataType.typeName, item.dataType.sql.toLowerCase))
         val annotateCount = annotationCount