elife01256.xml

<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.1d1 20130915//EN"  "JATS-archivearticle1.dtd"><article article-type="research-article" dtd-version="1.1d1" xmlns:xlink="http://www.w3.org/1999/xlink"><front><journal-meta><journal-id journal-id-type="nlm-ta">elife</journal-id><journal-id journal-id-type="hwp">eLife</journal-id><journal-id journal-id-type="publisher-id">eLife</journal-id><journal-title-group><journal-title>eLife</journal-title></journal-title-group><issn publication-format="electronic">2050-084X</issn><publisher><publisher-name>eLife Sciences Publications, Ltd</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">01256</article-id><article-id pub-id-type="doi">10.7554/eLife.01256</article-id><article-categories><subj-group subj-group-type="display-channel"><subject>Research article</subject></subj-group><subj-group subj-group-type="heading"><subject>Developmental biology and stem cells</subject></subj-group><subj-group subj-group-type="heading"><subject>Genes and chromosomes</subject></subj-group></article-categories><title-group><article-title>Chromatin signature of widespread monoallelic expression</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes" id="author-6724"><name><surname>Nag</surname><given-names>Anwesha</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="fn" rid="equal-contrib">†</xref><xref ref-type="fn" rid="con1"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/><xref ref-type="other" rid="dataro2"/><xref ref-type="other" rid="dataro3"/></contrib><contrib contrib-type="author" equal-contrib="yes" id="author-6725"><name><surname>Savova</surname><given-names>Virginia</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="fn" rid="equal-contrib">†</xref><xref ref-type="fn" rid="con2"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/><xref ref-type="other" rid="dataro2"/><xref ref-type="other" rid="dataro3"/></contrib><contrib contrib-type="author" id="author-6726"><name><surname>Fung</surname><given-names>Ho-Lim</given-names></name><xref ref-type="aff" rid="aff3"/><xref ref-type="fn" rid="con3"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/><xref ref-type="other" rid="dataro3"/></contrib><contrib contrib-type="author" id="author-6727"><name><surname>Miron</surname><given-names>Alexander</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="con4"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/></contrib><contrib contrib-type="author" id="author-6728"><name><surname>Yuan</surname><given-names>Guo-Cheng</given-names></name><xref ref-type="aff" rid="aff4"/><xref ref-type="fn" rid="con5"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/></contrib><contrib contrib-type="author" id="author-6729"><name><surname>Zhang</surname><given-names>Kun</given-names></name><xref ref-type="aff" rid="aff3"/><xref ref-type="other" rid="par-3"/><xref ref-type="fn" rid="con6"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/><xref ref-type="other" rid="dataro3"/></contrib><contrib contrib-type="author" corresp="yes" id="author-6730"><name><surname>Gimelbrant</surname><given-names>Alexander A</given-names></name><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/><xref ref-type="corresp" rid="cor1">*</xref><xref ref-type="other" rid="par-1"/><xref ref-type="other" rid="par-2"/><xref ref-type="other" rid="par-4"/><xref ref-type="fn" rid="con7"/><xref ref-type="fn" rid="conf1"/><xref ref-type="other" rid="dataro1"/><xref ref-type="other" rid="dataro2"/><xref ref-type="other" rid="dataro3"/></contrib><aff id="aff1"><institution content-type="dept">Department of Cancer Biology and Center for Cancer Systems Biology</institution>, <institution>Dana-Farber Cancer Institute</institution>, <addr-line><named-content content-type="city">Boston</named-content></addr-line>, <country>United States</country></aff><aff id="aff2"><institution content-type="dept">Department of Genetics</institution>, <institution>Harvard Medical School</institution>, <addr-line><named-content content-type="city">Boston</named-content></addr-line>, <country>United States</country></aff><aff id="aff3"><institution content-type="dept">Department of Bioengineering</institution>, <institution>University of California, San Diego</institution>, <addr-line><named-content content-type="city">La Jolla</named-content></addr-line>, <country>United States</country></aff><aff id="aff4"><institution content-type="dept">Department of Biostatistics and Computational Biology</institution>, <institution>Dana-Farber Cancer Institute</institution>, <addr-line><named-content content-type="city">Boston</named-content></addr-line>, <country>United States</country></aff></contrib-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Gingeras</surname><given-names>Thomas</given-names></name><role>Reviewing editor</role><aff><institution>Cold Spring Harbor Laboratory</institution>, <country>United States</country></aff></contrib></contrib-group><author-notes><corresp id="cor1"><label>*</label>For correspondence: <email>alexander_gimelbrant@dfci.harvard.edu</email></corresp><fn fn-type="con" id="equal-contrib"><label>†</label><p>These authors contributed equally to this work</p></fn></author-notes><pub-date date-type="pub" publication-format="electronic"><day>31</day><month>12</month><year>2013</year></pub-date><pub-date pub-type="collection"><year>2013</year></pub-date><volume>2</volume><elocation-id>e01256</elocation-id><history><date date-type="received"><day>19</day><month>07</month><year>2013</year></date><date date-type="accepted"><day>21</day><month>11</month><year>2013</year></date></history><permissions><copyright-statement>© 2013, Nag et al</copyright-statement><copyright-year>2013</copyright-year><copyright-holder>Nag et al</copyright-holder><license xlink:href="http://creativecommons.org/licenses/by/3.0/"><license-p>This article is distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/3.0/">Creative Commons Attribution License</ext-link>, which permits unrestricted use and redistribution provided that the original author and source are credited.</license-p></license></permissions><self-uri content-type="pdf" xlink:href="elife01256.pdf"/><abstract><object-id pub-id-type="doi">10.7554/eLife.01256.001</object-id><p>In mammals, numerous autosomal genes are subject to mitotically stable monoallelic expression (MAE), including genes that play critical roles in a variety of human diseases. Due to challenges posed by the clonal nature of MAE, very little is known about its regulation; in particular, no molecular features have been specifically linked to MAE. In this study, we report an approach that distinguishes MAE genes in human cells with great accuracy: a chromatin signature consisting of chromatin marks associated with active transcription (H3K36me3) and silencing (H3K27me3) simultaneously occurring in the gene body. The MAE signature is present in ∼20% of ubiquitously expressed genes and over 30% of tissue-specific genes across cell types. Notably, it is enriched among key developmental genes that have bivalent chromatin structure in pluripotent cells. Our results open a new approach to the study of MAE that is independent of polymorphisms, and suggest that MAE is linked to cell differentiation.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01256.001">http://dx.doi.org/10.7554/eLife.01256.001</ext-link></p></abstract><abstract abstract-type="executive-summary"><object-id pub-id-type="doi">10.7554/eLife.01256.002</object-id><title>eLife digest</title><p>Understanding how genes are activated and silenced is one of the central challenges in modern biology. These processes underpin the development of a fertilized egg into a complex organism, and they can also lead to life-threatening diseases when they go wrong. There are two copies of each gene in a human cell, a maternal copy and a paternal copy, and it is thought that both copies are usually regulated together. However, there are exceptions to this rule: for certain genes only the maternal copy is expressed as a protein in some cells, whereas the paternal copy is expressed in other cells.</p><p>This form of gene regulation, which is called monoallelic expression, can result in neighboring cells heading down very different paths. In extreme cases, depending on the differences between the two copies of the gene, cells that express one copy may function normally, while cells where the other copy is activated will start forming tumors. However, despite these potentially grave consequences, and early results which suggested that monoallelic expression affected a large number of human and mouse genes, it has proved to be a major technical challenge to identify these genes in most cell types.</p><p>Now, Nag, Savova et al. have discovered a molecular signature that can be used to detect monoallelic expression. The signature was found in chromatin, the densely packed structure formed by DNA and proteins inside the cell nucleus. Nag, Savova et al. discovered that the genes that are subject to monoallelic expression are bound with proteins that are modified in two contrasting ways. One modification, which is usually a sign of gene silencing, is prevalent on the inactive copy of the gene, and the other, which often marks active genes, is chiefly present on the active copy.</p><p>Nag, Savova et al. report that these modifications are found in different sets of genes in different cell types, indicating distinct genome-wide patterns of monoallelic expression. The chromatin signature approach lets them estimate the fraction of human genes that are subject to monoallelic expression. This number is surprisingly high: about 20% of commonly expressed genes and more than one-third of tissue-specific genes. In a particularly intriguing finding, almost all bivalent genes—a subset of genes that are involved in determining the fate of cell during development—are estimated to become monoallelic when they are activated.</p><p>In addition to these unexpected findings, the chromatin signature approach opens the door to exploring monoallelic expression as a form of gene regulation in all types of cells and, ultimately, to understanding how it is involved in both normal development and in disease.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01256.002">http://dx.doi.org/10.7554/eLife.01256.002</ext-link></p></abstract><kwd-group kwd-group-type="author-keywords"><title>Author keywords</title><kwd>monoallelic expression</kwd><kwd>chromatin</kwd><kwd>epigenome</kwd><kwd>bivalent</kwd><kwd>AST-Seq</kwd></kwd-group><kwd-group kwd-group-type="research-organism"><title>Research organism</title><kwd>Human</kwd></kwd-group><funding-group><award-group id="par-1"><funding-source><institution-wrap><institution>Claudia Adams Barr Foundation</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Gimelbrant</surname><given-names>Alexander A</given-names></name></principal-award-recipient></award-group><award-group id="par-2"><funding-source><institution-wrap><institution>Susan F Smith Center for Women’s Cancers</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Gimelbrant</surname><given-names>Alexander A</given-names></name></principal-award-recipient></award-group><award-group id="par-3"><funding-source><institution-wrap><institution>National Institutes of Health</institution></institution-wrap></funding-source><award-id>R01GM097253</award-id><principal-award-recipient><name><surname>Zhang</surname><given-names>Kun</given-names></name></principal-award-recipient></award-group><award-group id="par-4"><funding-source><institution-wrap><institution>Pew Scholars Program</institution></institution-wrap></funding-source><principal-award-recipient><name><surname>Gimelbrant</surname><given-names>Alexander A</given-names></name></principal-award-recipient></award-group><funding-statement>The funders had no role in study design, data collection and interpretation, or the decision to submit the work for publication.</funding-statement></funding-group><custom-meta-group><custom-meta><meta-name>elife-xml-version</meta-name><meta-value>2</meta-value></custom-meta><custom-meta specific-use="meta-only"><meta-name>Author impact statement</meta-name><meta-value>Active and repressive chromatin marks, asymmetrically distributed between alleles, distinguish gene bodies subject to epigenetically controlled monoallelic expression on autosomes in human cells.</meta-value></custom-meta></custom-meta-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>A variety of genetic and epigenetic factors affect the relative expression levels of the two copies of each given gene in diploid cells. In addition to cis- and trans-regulatory variation (<xref ref-type="bibr" rid="bib16">Gilad et al., 2008</xref>), there are at least three major kinds of non-Mendelian phenomena that control allele-specific expression in mammals. One is the X chromosome inactivation (<xref ref-type="bibr" rid="bib34">Lyon, 1961</xref>): in female embryos, around the time of implantation, about half of the cells choose to inactivate the maternal X, and the rest inactivate the paternal X, affecting most of the X-linked genes (<xref ref-type="bibr" rid="bib8">Carrel and Willard, 2005</xref>; <xref ref-type="bibr" rid="bib2">Berletch et al., 2010</xref>; <xref ref-type="bibr" rid="bib51">Yang et al., 2010</xref>). Another is imprinting: genes such as <italic>IGF2</italic> and <italic>H19</italic> are expressed either from one allele, either paternal or maternal (<xref ref-type="bibr" rid="bib20">Glaser et al., 2006</xref>).</p><p>Finally, a significant fraction of mammalian autosomal genes are subject to monoallelic expression (MAE), which reflects a mitotically stable allele-specific expression with different allelic states in clonal lineages. MAE is observed in olfactory receptor genes (<xref ref-type="bibr" rid="bib9">Chess et al., 1994</xref>), as well as genes coding for immunoglobulins and some cytokines (<xref ref-type="bibr" rid="bib44">Pernis et al., 1965</xref>; <xref ref-type="bibr" rid="bib6">Bix and Locksley, 1998</xref>; <xref ref-type="bibr" rid="bib22">Holländer et al., 1998</xref>). Using genome-wide analyses of allele-specific expression, we and others have added a surprisingly large number of the autosomal genes in human and mouse to the MAE class (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>; <xref ref-type="bibr" rid="bib23">Jeffries et al., 2012</xref>; <xref ref-type="bibr" rid="bib54">Zwemer et al., 2012</xref>; <xref ref-type="bibr" rid="bib31">Li et al., 2012b</xref>), including genes implicated in a number of human diseases, such as Alzheimer’s disease (<italic>APP</italic>) (<xref ref-type="bibr" rid="bib5">Bertram and Tanzi, 2012</xref>) and cancer (<italic>DAPK1</italic>) (<xref ref-type="bibr" rid="bib45">Raval et al., 2007</xref>). MAE affects about 10% of ∼4000 tested genes in human lymphoblastoid cells (LCLs) and about 15% of more than 1300 assessed genes in analogous mouse cells (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>; <xref ref-type="bibr" rid="bib54">Zwemer et al., 2012</xref>).</p><p>Our growing appreciation of the prevalence of MAE only underscores how little we know about its biology. The only existing, large-scale sets of data are collected in clonal lymphocyte cell lines in vitro. The limited number of analyzed clones is insufficient to generate a complete catalog of MAE genes in that cell type, and little is known about the prevalence of MAE in other cell types. Virtually nothing is understood about the establishment of MAE during development and differentiation. Mechanistically, allelic choice has been linked to changes in chromatin states in some special cases: imprinting (<xref ref-type="bibr" rid="bib50">Wen et al., 2008</xref>), olfactory receptor gene choice (<xref ref-type="bibr" rid="bib35">Magklara et al., 2011</xref>), and immunoglobulin-kappa gene rearrangement (<xref ref-type="bibr" rid="bib14">Farago et al., 2012</xref>). In contrast, for hundreds of other autosomal MAE genes, no molecular features have been associated with establishment and maintenance of allelic choice. Similarly, there is no general understanding of MAE’s function.</p><p>A major technical bottleneck in addressing these questions is the clonal nature of MAE (<xref ref-type="fig" rid="fig1s1">Figure1—figure supplement 1</xref>). Like X inactivation, MAE is masked in polyclonal samples, and obtaining monoclonal cell populations is challenging for most tissue types, particularly so in vivo. Moreover, genome-wide methods are limited by the availability of polymorphisms. In this study, we report a fundamentally new approach to the detection of monoallelic expression. In contrast to other methods, it does not require any allele-specific information, instead relying on a specific chromatin pattern as a proxy for MAE. We use this approach to address questions about MAE’s prevalence, development, and function.</p></sec><sec id="s2" sec-type="results"><title>Results</title><sec id="s2-1"><title>MAE genes have a characteristic chromatin signature</title><p>Histone modifications, in their diversity, present rich combinatorial possibilities for controlling gene transcription (<xref ref-type="bibr" rid="bib1">Barski et al., 2007</xref>; <xref ref-type="bibr" rid="bib13">Ernst and Kellis, 2010</xref>; <xref ref-type="bibr" rid="bib15">Filion et al., 2010</xref>). They therefore offer a constrained yet rich set of data for systematic analysis. To identify histone modifications that might be specific to the MAE genes, we compared chromatin marks associated with known MAE genes against those for known biallelically expressed (BAE) genes.</p><p>Previous observations in human and mouse cells suggested that a gene could show MAE in one cell type but not another (<xref ref-type="bibr" rid="bib19">Gimelbrant et al., 2005</xref>; <xref ref-type="bibr" rid="bib18">Gimelbrant and Chess, 2006</xref>; <xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>). We therefore hypothesized that if there were a correspondence between MAE state and chromatin modifications, it would be more pronounced when comparing data from the same cell type, while not necessarily from exactly the same cells. Since the largest available sets of known human MAE and BAE genes were identified in human lymphoblasts (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>) (see also Dataset S1 in Dryad, <xref ref-type="bibr" rid="bib41">Nag et al., 2013</xref>), we used these sets to compare with histone modification data for GM12878 lymphoblastoid cells deposited by the ENCODE project (<xref ref-type="bibr" rid="bib12">Dunham et al., 2012</xref>).</p><p>We focused on the eight marks that were investigated in a broad variety of cell types: H3K27me3 (histone H3 Lys-27 trimethylation), H3K36me3, H3K4me2, H4K20me3, H3K27ac (histone H3 Lys-27 acetylation), H3K4me1, H3K4me3, H3K9ac (<xref ref-type="fig" rid="fig1s2">Figure1—figure supplement 2A</xref>). To enable our analysis, we reduced complex patterns of histone modifications to two simple features capturing signal intensity in two distinct spatial domains: the proximal promoter signal (for any given modification: ChIP-Seq signal intensity integrated over the 2.5 kb region upstream of transcription start) and the signal integrated over the whole gene body (green and red areas in <xref ref-type="fig" rid="fig1">Figure 1A</xref>; see ‘Materials and methods’, <xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2</xref>). We then set out to analyze in a systematic way whether some combination of these measured signals can reliably distinguish known MAE genes and known biallelic genes.<fig-group><fig id="fig1" position="float"><object-id pub-id-type="doi">10.7554/eLife.01256.003</object-id><label>Figure 1.</label><caption><title>Genes with monoallelic expression have specific chromatin signature within the gene body.</title><p>(<bold>A</bold>) Assessment of histone modifications. The mapped ChIP-Seq signals for the listed modifications were derived from the total signal over the gene-body or promoter region: shown is the gene body signal for the two most informative chromatin marks H3K36me3 (<italic>green</italic>) and H3K27me3 (red). <italic>EBF1</italic> gene was shown to be MAE, <italic>ABCC1</italic> was shown to be biallelic in lymphoblastoid cells (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>). ChIP-Seq data in GM12878 lymphoblasts were generated by the ENCODE project. Graphics adapted from UCSC genome browser (<ext-link ext-link-type="uri" xlink:href="http://genome.ucsc.edu/">http://genome.ucsc.edu/</ext-link>; <xref ref-type="bibr" rid="bib38">Meyer et al., 2013</xref>). Height of the signal tracks was set 0–8. (<bold>B</bold>) High confidence MAE (blue) and biallelic (gold) autosomal genes in the training set are separated by the gene body signal for H3K27me3 and H3K36me3 in GM12878 cells. Light blue area illustrates partitioning of this space by the most optimal classifier (DT2F). Solid line demarcates external border of ‘Neutral’ setting; dotted line shows more restrictive ‘Precision’ setting and is a graphical representation of the boundary identified by an alternating decision tree (DTree), which was the best-performing machine learning method applied to the features after feature selection. Of 270 high confidence MAE genes, 268 had data for both H3K27me3 and H3K36me3. Of these, 204 (76%) are within predicted MAE region. (<bold>C</bold>) Distribution of all autosomal RefSeq genes in GM12878 cells according to gene body signal for H3K27me3 and H3K36me3. Genes are color-mapped according to their expression level in GM12878 cells, from lowly expressed in red to highly expressed in yellow. Silent transcripts (RPKM &lt;= 0.1) are shown in gray. Solid and dotted lines as in 1B. (<bold>D</bold>) Fraction of predicted MAE genes as a function of gene expression level. Left vertical axis: absolute number of predicted MAE (blue) and non-MAE genes (gold) per expression level bin. Right axis: fraction of predicted MAE genes (red circles) per same bin. Expression bins are 0.1 log10 units of RPKM in GM12878 cells. (<bold>E</bold>) Genome distribution of predicted MAE and biallelic genes and their expression level. Shown is chromosome 19; other autosomes are similar. Blue—genes predicted as MAE; gold—genes predicted as biallelic. Position along the chromosome corresponds to transcription start site of the gene; marker length reflects gene expression level in GM12878 cells. Only genes with RPKM &gt; 1 are shown.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01256.003">http://dx.doi.org/10.7554/eLife.01256.003</ext-link></p></caption><graphic xlink:href="elife01256f001"/></fig><fig id="fig1s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01256.004</object-id><label>Figure 1—figure supplement 1.</label><caption><title>Chromatin signature of monoallelic expression allows its detection in monoclonal and polyclonal samples.</title><p>Detection of MAE by expression bias is not possible in polyclonal cell populations as both paternal and maternal transcripts are present, making expression appear biallelic. H3K36me3 is indicated by green circles and H3K27me3 is indicated by red circles.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01256.004">http://dx.doi.org/10.7554/eLife.01256.004</ext-link></p></caption><graphic xlink:href="elife01256fs001"/></fig><fig id="fig1s2" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01256.005</object-id><label>Figure 1—figure supplement 2.</label><caption><title>Building and performance of chromatin feature classifiers.</title><p>(<bold>A</bold>) The mapped ChIP-Seq signals for the listed modifications were derived from the total signal over the gene-body (green) or 2.5 kb promoter region (red). <italic>EBF1</italic> gene was shown to be MAE in lymphoblastoid cells (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>). ChIP-Seq data in GM12878 lymphoblasts were generated by the ENCODE project. Graphics adapted from UCSC genome browser (<ext-link ext-link-type="uri" xlink:href="http://genome.ucsc.edu/">http://genome.ucsc.edu/</ext-link>; <xref ref-type="bibr" rid="bib38">Meyer et al., 2013</xref>). Height of the signal tracks was set 0–8. (<bold>B</bold>) Comparison of precision and recall of different classifier types when using distinct sets of chromatin features. False positive (FP) and false negative (FN) calls for training set of MAE and BAE genes are shown as function of the increasing cost of false positive errors. Classifiers shown: DT–Decision Tree; NB–Naïve Bayes. Feature sets: ‘7 features’–gene body signal for H3K27me3 and H3K36me3; and promoter signal for H3K27me3, H3K36me3, H3K4me2, H4K20me3, and H3K27ac; ‘2 features (also called DT2F)’—only gene body signals for H3K27me3 and H3K36me3. Neutral and Precision settings were chosen, respectively, for best recall, and for the optimal combination of recall and precision. (<bold>C</bold>) Comparison of the 2-feature (GeneBody) and 7-feature (GenePromoterAndBody) classifiers. Similarity of precision and recall values suggests that the two chromatin marks, H3K27me3 and K3K36me3, account for most of the difference between MAE and BAE genes.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01256.005">http://dx.doi.org/10.7554/eLife.01256.005</ext-link></p></caption><graphic xlink:href="elife01256fs002"/></fig><fig id="fig1s3" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01256.006</object-id><label>Figure 1—figure supplement 3.</label><caption><title>Distribution of various promoter and/or normalized gene body signal combinations in GM12878 cells in our training set.</title><p>High confidence MAE (blue) and biallelic (gold) autosomal genes in the training set do form clusters in some of these cases but fail to achieve as clear a separation of MAE and biallelic genes as does normalized H3K27me3 and H3K36me3 gene body signal combination (<xref ref-type="fig" rid="fig1">Figure 1B</xref>). Data are shown for a few representative cases.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01256.006">http://dx.doi.org/10.7554/eLife.01256.006</ext-link></p></caption><graphic xlink:href="elife01256fs003"/></fig></fig-group></p><p>Using all available features, we wanted to identify parameters that would offer the optimal tradeoff between low false positive rate and the largest possible number of correctly identified MAE genes. After systematic exploration detailed in <xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2B</xref>, we chose two relative penalty tradeoff settings: ‘Neutral’ (1:1) and ‘Precision’ (8:1). The best-performing classifier, Decision Tree (DT), identified at the more relaxed ‘Neutral’ stringency setting almost 80% of the known MAE genes with 20% false positive rate. At a more stringent, precision-optimized setting, the DT classifier identified only 10% false positives, maintaining ∼60% of true positives (<xref ref-type="fig" rid="fig1s2">Figure1—figure supplement 2B</xref>).</p><p>Strikingly, performance of the DT classifier with just two gene-body features, H3K27me3 (associated with gene silencing) and H3K36me3 (associated with gene transcription), was as good as the performance of any full-featured model (<xref ref-type="fig" rid="fig1s2">Figure1—figure supplement 2C</xref>). Other feature combinations also had some discriminatory power, but not as significant (<xref ref-type="fig" rid="fig1s3">Figure 1—figure supplement 3</xref>). This suggests that H3K27me3 and H3K36me3, taken together, account for most of the distinction between MAE and BAE genes. <xref ref-type="fig" rid="fig1">Figure 1B</xref> also illustrates the partition of the phase space defined by these two signals by the most optimal classifier (two-feature Decision Tree; DT2F; see ‘Materials and methods’) at both high and medium stringency settings (Precision and Neutral). Performance of the DT2F classifier was at least as good as the performance of any classifier, including any other tested feature combination (<xref ref-type="fig" rid="fig1s2">Figure 1—figure supplement 2</xref>).</p><p>We concluded that known autosomal MAE and BAE genes in this study show consistent, characteristic differences with respect to the gene body ChIP-seq signal for H3K27me3 and H3K36me3. Co-occurrence of these two signals in the same gene body is thus a specific chromatin signature of known MAE genes. Importantly, MAE identified in clones is reflected by the chromatin signature in a polyclonal sample.</p></sec><sec id="s2-2"><title>Chromatin signature is a reliable predictor of MAE</title><p>To broadly test this chromatin signature as a predictor of the MAE state, we identified novel genes with the MAE chromatin pattern, and assessed their allelic bias in monoclonal cell lines. Specifically, we calculated gene-body signal for H3K27me3 and H3K36me3 for all autosomal RefSeq genes based on the ENCODE ChIP-seq data for GM12878 lymphoblastoid cells (<xref ref-type="fig" rid="fig1">Figure 1C</xref>). In these cells, the classifier predicted as monoallelic 1315 (13%) of 10,322 autosomal genes with moderate or higher expression (RPKM &gt; 1), and 15 genes (3%) of 450 highly expressed genes (with RPKM &gt; 100) (<xref ref-type="fig" rid="fig1">Figure 1D</xref>; detailed in Dataset S1 in Dryad, <xref ref-type="bibr" rid="bib41">Nag et al., 2013</xref>). As a group, the predicted MAE genes were spread throughout all autosomes. They were interspersed with biallelic genes and showed a variety of expression levels (<xref ref-type="fig" rid="fig1">Figure 1E</xref>). This is consistent with MAE genes identified earlier (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>; <xref ref-type="bibr" rid="bib54">Zwemer et al., 2012</xref>).</p><p>To test for transcriptional allelic bias in predicted MAE genes, we used two complementary approaches. For a broad, genome-wide analysis, we used mRNA-seq in two independent (with different direction of X-inactivation) monoclonal cell lines, DF1 and DF2. We derived these lines from GM12878 line (see ‘Materials and methods’), because both its own genome and its parental genomes were fully sequenced by the 1000 Genomes Project (<xref ref-type="bibr" rid="bib10">Consortium, 2010</xref>). We generated the RNA-seq data from the DF1 and DF2 clones and analyzed them using a custom analysis pipeline (see ‘Materials and methods’). Of the 48 X-linked genes with SNPs covered by 10 or more reads, 43 (∼90%) showed positive evidence of clone-specific allelic bias; the rest were inconclusive; positive evidence for equivalence was not detected in any X-linked genes (Dataset S2 in Dryad, <xref ref-type="bibr" rid="bib41">Nag et al., 2013</xref>). By contrast, of the 3270 autosomal genes with comparable coverage, 1167 (35%) showed positive evidence of equivalent expression of both alleles and no evidence of bias in either clone; 269 (8%) had allelic bias, while the rest were inconclusive.</p><p>Altogether, there were 5001 autosomal genes with at least one SNP covered at any level (Dataset S2 in Dryad, <xref ref-type="bibr" rid="bib41">Nag et al., 2013</xref>). Of these, 1021 genes were predicted to be MAE by the DT2F classifier at the Neutral setting and 236 genes at the Precision-optimized setting. For a quantitative bias comparison, we used all expressed genes. For additional control, we also used equally sized sets of genes with matched levels of expression, randomly chosen from these remaining genes. Both control groups showed quite small mean allelic bias, about 60:40. By contrast, genes predicted MAE by the Neutral DT2F classifier showed about 75:25 bias, and the genes predicted by the classifier at Precision setting had mean bias of 90:10 (<xref ref-type="fig" rid="fig2">Figure 2B</xref>). To estimate probability of error, we sampled 10 sets of 40 predicted genes and 10 equally-sized control sets, matched by expression; the comparison of the mean bias in the 10 sampled sets showed highly significant difference (p&lt;9e−05; non-paired <italic>t</italic> test). Thus the predicted MAE genes had significantly higher bias than the control genes. In subsequent analysis, we used the neutral classifier setting in order to maximize the number of candidate MAE genes and scrutinize predictive properties of this less stringent setting.<fig-group><fig id="fig2" position="float"><object-id pub-id-type="doi">10.7554/eLife.01256.007</object-id><label>Figure 2.</label><caption><title>Prediction testing with RNA-Seq.</title><p>(<bold>A</bold>) Representative examples of allelic counts in data from two clones (DF1 and DF2) derived from GM12878 cells. Shown are total maternal (Mat; ‘pink’) or paternal (Pat; ‘blue’) counts for X-linked genes and autosomal monoallelic genes illustrating that the direction of allelic bias is clone-specific. (<bold>B</bold>) Mean allelic bias in different groups of genes in DF1 and DF2 clones as assessed by the RNA-Seq analysis. ‘50’ corresponds to perfect balance between alleles; ‘100’ to perfectly monoallelic expression. ‘Precision’ and ‘Neutral’—all informative expressed genes predicted as MAE using corresponding settings of the DT2F classifier; ‘All’—all informative expressed genes from GM12878 cells; ‘RPKM matched’—predicted biallelic genes, matched by the expression level to the predicted MAE genes (shown are the mean and standard deviation for 10 permuted sets of genes). (<bold>C</bold>) Definitions of allelic bias and lack of bias. Unbiased genes (gold) pass equivalence test (with 2:1 boundaries in either direction; equivalence area is light yellow); biased (blue) pass binomial test; genes that pass neither statistical test are called inconclusive (gray). See <xref ref-type="fig" rid="fig2s1">Figure 2—figure supplement 1</xref> for X-chromosome analysis results according to this scheme. (<bold>D</bold>) Fraction of genes showing allelic bias in DF1 and DF2 clones as assessed by RNA-seq. Biased genes were called in at least one clone based on FDR-corrected binomial test and displayed at least 2:1 bias. Unbiased genes were called based on passing the equivalence test in at least on clone and not passing the bias test in the other clone. (<bold>E</bold>) Allele-specific analysis of RNA-Seq data from DF1 and DF2 clones. Experimentally determined allelic states of autosomal genes. Predicted monoallelic and biallelic status is based on the neutral DT2F classifier. Assignments of genes as biased, unbiased or inconclusive (indeterminate in both clones). Color-coding as in panel <bold>C</bold>.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01256.007">http://dx.doi.org/10.7554/eLife.01256.007</ext-link></p></caption><graphic xlink:href="elife01256f002"/></fig><fig id="fig2s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01256.008</object-id><label>Figure 2—figure supplement 1.</label><caption><title>Allelic bias calling on X-Chromosome.</title><p>Application of the bias definition to X-linked genes in the DF1 and DF2 clones. Each rectangle corresponds to one gene; only informative genes are shown; genes are in the correct order on the chromosome but positions are not to scale. Blue—biased with paternal expression; magenta—biased with maternal expression (as defined in <xref ref-type="fig" rid="fig2">Figure 2C</xref>).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01256.008">http://dx.doi.org/10.7554/eLife.01256.008</ext-link></p></caption><graphic xlink:href="elife01256fs004"/></fig></fig-group></p><p>Next, we used the RNA-Seq data to categorize predicted and control genes as biased, unbiased, or indeterminate (<xref ref-type="fig" rid="fig2">Figure 2C</xref>). Biased expression was identified based on FDR-corrected binomial testing and allelic skewing of at least 2:1 (see ‘Materials and methods’). Importantly, rejection of the bias hypothesis by this test does not automatically mean the gene could be called unbiased. Therefore, we used equivalence testing (<xref ref-type="bibr" rid="bib32">Limentani et al., 2005</xref>), with equivalence boundaries corresponding to the two-fold imbalance; genes that failed both tests were called indeterminate. Genes predicted by the DT2F neutral classifier were enriched for genes with positively identified allelic bias; the precision classifier setting, as expected, yielded still better enrichment but fewer positively identified genes (<xref ref-type="fig" rid="fig2">Figure 2D,E</xref>).</p><p>This RNA-Seq approach confirms MAE predictions on a whole-transcriptome level, but it has significant limitations. Insufficient coverage depth leaves an overwhelming majority of genes as ‘indeterminate’ (<xref ref-type="fig" rid="fig2">Figure 2E</xref>). This results in underestimation of both the true positive and the true negative rates. Furthermore, a large majority of known MAE genes (about 85%) show biallelic expression in some clonal lineages (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>; <xref ref-type="bibr" rid="bib54">Zwemer et al., 2012</xref>). This is highly important when considering any validation experiments, since even exhaustive analysis of just two independent clones would miss monoallelic expression in many such genes that would happen to show biallelic expression in the two assessed clones. To validate MAE predictions more conclusively, we measured allelic bias in a greater number of independent clones. To simultaneously increase both coverage depth and the number of biological samples, we designed a targeted extra-deep RNA-Seq assay (allele-specific targeted sequencing; AST-Seq) that allowed us to precisely quantify allele-specific expression of a subset of genes in an increased number of clones (see <xref ref-type="fig" rid="fig3">Figure 3A</xref>).<fig id="fig3" position="float"><object-id pub-id-type="doi">10.7554/eLife.01256.009</object-id><label>Figure 3.</label><caption><title>Prediction testing with allele-specific targeted sequencing (AST-Seq).</title><p>(<bold>A</bold>) Schematic representation of the deep barcoding approach. As an illustration, analysis of three clones with no multiplexing is shown, each with a different allelic bias at a SNP of interest. Random-primed cDNA or genomic DNA are used as templates for PCR1, using gene-specific primers with universal tails. The next step associated universal amplicon tails in each sample with two barcodes (PCR2); this allows for barcoding a large number of samples with limited number of secondary primers. For a given sample, all amplicons share the same two barcodes. Barcoded amplicons from all samples are pooled, and sequencing adaptors attached. After sequencing and deconvolving by barcode, allelic hits are counted. (<bold>B</bold>) Representative allelic counts using the AST-seq. Allelic bias was assessed in two clonal lines, DF1 and DF2, derived from GM12878 and four clones, H2, H7, H14, and H16, from GM13130 (‘H0’) cells. Target SNPs were chosen to be informative in both cell lines. Genomic DNA (gray) was used as a control for allelic bias introduced in amplification; only unbiased assays were pursued. Shown are representative assays for X-linked genes (as control), and examples of genes predicted MAE or biallelic based on the chromatin signature in GM12878. Pink: expression bias towards reference (Ref) allele; blue: expression bias towards alternative (Alt) allele; gold: unbiased expression; no color: counts below threshold—data ignored. Note that, as expected, genes with clone-specific MAE could be biallelic in some clones. (<bold>C</bold>) Summary of the AST-seq analysis for all tested genes in six clonal samples. Biased (blue) and unbiased (gold) expression as defined in <xref ref-type="fig" rid="fig2">Figure 2C</xref>.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01256.009">http://dx.doi.org/10.7554/eLife.01256.009</ext-link></p></caption><graphic xlink:href="elife01256f003"/></fig></p><p>To assess both false negative and false positive rates for predictions by the DT2F classifier, we chose a set of predicted, unconfirmed MAE genes expressed in both assessed clones, and a comparable random set of predicted biallelic genes (see ‘Method note 2’). Previously, we had derived and characterized several independent clones from GM13130 lymphoblastoid cells (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>). Starting with four of these clones and the two clones from GM12878, we selected SNPs heterozygous in both genotypes. To control for possible genotyping errors and amplification bias, we used genomic DNA from the same cells. After removing SNPs that did not pass the equivalence test in the gDNA (cf. <xref ref-type="fig" rid="fig2">Figure 2C</xref>), we had SNPs in 17 predicted MAE genes and 28 predicted biallelic genes. As templates, we used DNA and DNase-treated nuclear RNA from original cell lines and the clones (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>); as a positive control for expression bias, we included X-linked genes Dataset S3 in Dryad (<xref ref-type="bibr" rid="bib41">Nag et al., 2013</xref>).</p><p>We performed two sequencing experiments, with overlapping samples, obtaining 1.6 M reads on average per sample per run. <xref ref-type="fig" rid="fig3">Figure 3B</xref> shows a representative subset of all classes of assessed genes and associated allelic counts (complete data in Dataset S3 in Dryad, <xref ref-type="bibr" rid="bib41">Nag et al., 2013</xref>). While alleles were equally represented in genomic DNA, the X-linked genes (e.g., <italic>PIR</italic> and <italic>XIAP</italic>) showed strong clone-specific bias; genes predicted as biallelic (e.g., <italic>CEP110</italic> and <italic>HDLBP</italic>) showed no significant bias. Most autosomal genes predicted as monoallelic (e.g., <italic>FRMD6</italic> and <italic>IGF1R</italic>) showed the characteristic pattern of clone-specific monoallelic expression: strong bias in some clones, and biallelic expression or opposite bias in others.</p><p>The complete results of this experiment (see Dataset S3 in Dryad, <xref ref-type="bibr" rid="bib41">Nag et al., 2013</xref>) are summarized in <xref ref-type="fig" rid="fig3">Figure 3C</xref>. Of the 17 predicted autosomal MAE genes, 13 exhibited strong allelic bias in at least some of the assessed clones, with 12 of those showing different direction of bias between individual clones (false positive rate of about 24%). Note that the false positive rate should be treated as an overestimate, since two of the unconfirmed genes failed in the GM13030-derived clones, while two others failed altogether. In addition, the number of clones we assayed, while higher than in the first experiment, is not sufficient to establish at high level of certainty that these are truly biallelic genes. Conversely, of the 28 predicted biallelic genes, none showed significant allelic bias in expression in any of the clones. We can thus conclude that the MAE chromatin signature is a specific and sensitive predictor of clone-specific MAE status.</p></sec><sec id="s2-3"><title>Asymmetric distribution of chromatin marks between the active and inactive alleles</title><p>We asked how the allelic distribution of the active and repressive chromatin marks in clonal cell lines relates to the transcriptional allelic bias. A multiplexed padlock probe approach (<xref ref-type="bibr" rid="bib52">Zhang et al., 2009</xref>) enabled us to assess allelic bias in heterozygous exonic SNPs in two clones with GM12878 genotype, and four clones from GM13130 cells. After removing assays that failed equivalence test in gDNA in all samples, we had 482 SNPs (see Dataset S4 in Dryad, <xref ref-type="bibr" rid="bib41">Nag et al., 2013</xref>). We used this approach to assess allelic bias in H3K27me3 and H3K36me3 ChIP samples simultaneously with cDNA from the same cells, as well as ChIP input and genomic DNA controls.</p><p><xref ref-type="fig" rid="fig4">Figure 4A</xref> shows representative examples of allelic counts for all classes of expression bias using a small number of SNPs in the two GM12878 clones (complete dataset in Dataset S4 in Dryad, <xref ref-type="bibr" rid="bib41">Nag et al., 2013</xref>). Allelic expression bias is evident in an imprinted gene <italic>SNRPN</italic>, on the X chromosome (<italic>SLC25A43</italic> and <italic>XIAP</italic>), and on autosomal MAE genes. Biased expression is accompanied by an asymmetric distribution of the two chromatin marks: H3K36me3 is associated with the higher transcribed allele, while H3K27me3 with the fully or partially silenced allele.<fig id="fig4" position="float"><object-id pub-id-type="doi">10.7554/eLife.01256.010</object-id><label>Figure 4.</label><caption><title>Correlation of allelic bias in expression with bias in chromatin marks.</title><p>(<bold>A</bold>) Representative examples of allelic counts in SNPs assessed with multiplexed targeted sequencing using padlocked probes. Apart from shown clones, additional clones from GM13130 individual were assessed (see ‘text’). Shown as control are an imprinted gene <italic>SNRPN</italic> and X-linked genes. Other genes were predicted MAE or biallelic based on their gene-body chromatin signature in GM12878 cells. Measurement for each SNP is shown as read counts for the reference and alternative (Ref/Alt) alleles as designated in dbSNP. Color-coding as in <xref ref-type="fig" rid="fig3">Figure 3B</xref>. Analysis summarized in this figure is based on 482 SNPs within 458 genes. (<bold>B</bold>) Correlation of allelic bias in H3K27me3 with allelic bias in cDNA. All informative SNPs were put into one of three bins according to their cDNA allelic bias: unbiased (<xref ref-type="fig" rid="fig2">Figure 2C</xref>); significantly biased towards reference allele; significantly biased towards alternative allele. For each of these groups, allelic bias for the ChIP sample from the same clone was assessed and analyzed using Kruskal–Wallis non-parametric ANOVA test. (<bold>C</bold>) Same as <bold>B</bold>, for H3K36me3. (<bold>D</bold>) Fraction of SNPs in predicted MAE and biallelic genes, showing allelic bias of 2:1. Bias calls were made by binomial testing in cDNA, H3K36me3 and H3K27me3 ChIP for SNPs, using data from the padlock probe experiments.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01256.010">http://dx.doi.org/10.7554/eLife.01256.010</ext-link></p></caption><graphic xlink:href="elife01256f004"/></fig></p><p>We used the complete dataset Dataset S4 in Dryad (<xref ref-type="bibr" rid="bib41">Nag et al., 2013</xref>) to evaluate global relationships between expression bias and chromatin allelic bias. In order to pool data from two individuals, one of which (GM13130) lacked complete genotypes for parents, we assessed SNP bias as reference and alternative alleles (rather than maternal or paternal bias). SNPs in cDNA were assigned to one of three bins: reference allele bias; no bias; and alternative allele bias. For these groups, allelic bias in H3K27me3 (<xref ref-type="fig" rid="fig4">Figure4B</xref>) and H3K36me3 (<xref ref-type="fig" rid="fig4">Figure 4C</xref>) was determined. In unbiased loci, both H3K27me3 and H3K36me3 were equally represented. In contrast, preferential expression of an allele was associated with elevated levels of H3K36me3 and decreased levels of H3K27me3 on that allele. Both effects were highly significant (p&lt;2 × 10e−9).</p><p>Genes predicted to have MAE were about fourfold over-represented among genes where SNPs showed significant bias (<xref ref-type="fig" rid="fig4">Figure 4D</xref>). SNPs with skewed H3K27me3 and H3K36me3 distribution were highly enriched in the genes predicted as MAE (p&lt;10e−6 and p=0.01, respectively; two-tailed Fisher’s exact test). This suggests that the asymmetric distribution of the histone modifications is to a large extent due to the genes that have the chromatin signature of monoallelic expression.</p></sec><sec id="s2-4"><title>Chromatin signature of MAE shows tissue-specific pattern</title><p>Using RNA–DNA FISH, we have previously shown (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>) that individual MAE genes identified in lymphoblasts also show monoallelic expression in vivo, in peripheral blood mononuclear cells (PBMCs). We used the trained DT2F classifier to analyze available ChIP-seq data from PBMCs (<xref ref-type="bibr" rid="bib4">Bernstein et al., 2010</xref>). In this and the following analyses we applied the classifier to all genes with evidence of transcription (excluding only genes that were not expressed; with RPKM&lt;0.1), to achieve the most comprehensive possible coverage (‘Method note 1’). The overall distribution of genes in the DT2F phase space was very similar in cells in vitro and in vivo, with two principal clusters at high H3K36me3 and high H3K27me3 (<xref ref-type="fig" rid="fig5">Figure 5A</xref>). Predicted genes in PBMC largely overlapped with our predictions for LCLs: out of 2057 genes showing the MAE chromatin signature in PBMC, and called in GM12878 cells, 83% (1712) were also predicted MAE there. This is particularly striking since the experimental data for these two related cell types were collected in two different laboratories. This suggests that the genes with MAE signature in lymphoblasts are quite similar to those in the related ex vivo cells.<fig-group><fig id="fig5" position="float"><object-id pub-id-type="doi">10.7554/eLife.01256.011</object-id><label>Figure 5.</label><caption><title>MAE chromatin signature in multiple cell types.</title><p>(<bold>A</bold>) Comparison of H3K27me3 and H3K36me3 ChIP-Seq gene body signal distribution for the autosomal genes in GM12878 cells (red) and in primary peripheral blood monocytes (PBMC; blue). Silent genes (RPKM &lt; 0.1) are excluded in either case. Both datasets were collected by ENCODE; PBMC data: <ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE16368">GSE16368</ext-link>. Note that the dots are made more transparent than in <xref ref-type="fig" rid="fig1">Figure 1</xref> to make clear the overall shape of the distribution. (<bold>B</bold>) Overlap of predicted MAE genes in GM12878 and PBMC (silent genes are excluded). (<bold>C</bold>) Tissue-specific distribution of MAE genes. Overlap between predicted MAE genes in three cell types as labeled. Within dotted circle: genes expressed in all three lines (and MAE in at least one). Outside dotted circle: MAE genes showing cell type-specific expression (predicted MAE and expressed in that cell type, but silent in at least one of the two other cell lines). (<bold>D</bold>) Similarity of predicted MAE gene sets in seven cell types: GM12878—lymphoblasts, K562—acute myelocytic leukemia, H1ESC—human embryonic stem cells, HSMM—human skeletal muscle myocytes, HUVEC—human vascular epithelium, HMEC—human mammary epithelium, HCC1954—breast cancer. Similarity assessed according to the Jaccard similarity measure. In the heat map, darker gray corresponds to higher similarity. ChIP-Seq and RNA-Seq data sources: see Dataset S1 and S2 in Dryad (<xref ref-type="bibr" rid="bib41">Nag et al., 2013</xref>). (<bold>E</bold>) Cumulative number of predicted MAE genes as a function of the number of cell lines assessed. Counted are only genes expressed in all analyzed cell lines. Order of addition of cell lines was sampled by permutation, shown are mean values ± standard deviation. (<bold>F</bold>) Cumulative number of all genes and predicted MAE genes as a function of the number of cell lines assessed. Counted are all genes with any evidence of expression in at least one cell type. (<bold>G</bold>) Gene Ontology (GO) analysis of genes predicted MAE in indicated cell types. Most over- and under-represented categories for GM12878 cells are also shown for other cell types (in each cell line, predicted MAE genes are compared to all expressed genes). Horizontal axis: −log10(p), after Benjamini–Hochberg correction. Gray lines correspond to −log10(p) values as noted.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01256.011">http://dx.doi.org/10.7554/eLife.01256.011</ext-link></p></caption><graphic xlink:href="elife01256f005"/></fig><fig id="fig5s1" position="float" specific-use="child-fig"><object-id pub-id-type="doi">10.7554/eLife.01256.012</object-id><label>Figure 5—figure supplement 1.</label><caption><title>Overall distributions of the normalized H3K27me3 and H3K36me3 gene body signal in the analyzed cell types.</title><p>Comparison of H3K27me3 and H3K36me3 ChIP-Seq gene body signal distribution for the autosomal genes in a number of human cell lines (as noted). Each dot represents an autosomal gene. Silent genes (RPKM &lt; 0.1) are excluded. Light blue area illustrates partitioning of this space by the most optimal classifier (DT2F).</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01256.012">http://dx.doi.org/10.7554/eLife.01256.012</ext-link></p></caption><graphic xlink:href="elife01256fs005"/></fig></fig-group></p><p>We then asked if that similarity is in fact due to biological relatedness of LCLs and PBMCs; alternatively, it could be due to the signature being uniform in all cell types. We applied our analysis to all autosomal genes expressed in several cell types analyzed by ENCODE: vascular and mammary epithelial cells (HUVEC and HMEC), skeletal muscle (HSMM), embryonic stem cells (H1ESC), a leukemia line K562 and breast cancer line HCC1954, drawing on deposited data to analyze with our model: ChIP-Seq of H3K27me3, H3K36me3, Input control, and RNA-Seq data (<xref ref-type="fig" rid="fig5">Figure 5C–D</xref>). In the DT2F space, genes formed two major clusters in all assessed cell types (<xref ref-type="fig" rid="fig5s1">Fig.5—figure supplement 1</xref>). Although the precise positions of these clusters varied somewhat, the overall distribution remained consistent. Importantly, the DT2F classifier trained on the GM12878 data consistently covered the lower cluster of genes in different assessed cell types.</p><p>In contrast to the similarity between PBMC and GM12878 data collected by different labs, cells of different types, showed pronounced differences in the set of genes classified by the DT2F as MAE, even though they were assessed in the same laboratory. This, along with the similarity in the general structure of the feature space (<xref ref-type="fig" rid="fig5s1">Fig 5—figure supplement 1</xref>) suggests that the biological properties of the cell type are captured by the model to a greater extent than other potential sources of variation and provides indirect support for the use of our method in other cell types. <xref ref-type="fig" rid="fig5">Figure 5C</xref> illustrates the prevalence of tissue-specific differences by comparing the predicted MAE genes in three cell lines. Genes predicted as MAE in only one of the three cell types are typically not expressed in all the three cell types. Strikingly, in each cell type between 1/3 and half of the predicted MAE genes showed cell type-specific expression. (In <xref ref-type="fig" rid="fig5">Figure 5C</xref>, compare 1184 predicted MAE genes expressed in HMEC but silent in one or both other cell lines, with 495 genes that are MAE only in HMEC but expressed also in HSMM and HUVEC cells, or with 395 genes that are expressed and predicted MAE in all three cell types.)</p><p>We interpret these observations to mean that the H3K36me3–H3K27me3 gene body signature is a general feature of monoallelic expression in multiple types of human cells, while a particular set of affected genes can be cell type-specific. It should be noted that the correspondence of the chromatin signature to monoallelic expression has only been systematically validated in clonal LCL lines (see <xref ref-type="fig" rid="fig2">Figure 2</xref> and <xref ref-type="fig" rid="fig3">Figure 3</xref>). Analysis in nonclonal cell populations (such as the ENCODE cell lines or isolated ex vivo cells) has been mostly restricted to single-cell techniques such as the fluorescent in situ hybridization approach which we have used in PBMCs (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>). Until a more general study is completed, it remains formally possible that the chromatin signature does not reflect MAE in some or many cell types. The analysis in the rest of this section should be understood as being performed on genes that have a particular chromatin signature, which at least in one cell type strongly correlates with monoallelic expression.</p><p>The greater the biological similarity between cell types, the greater the overlap in MAE genes (<xref ref-type="fig" rid="fig5">Figure 5D</xref>). Overall, cell lines fell into two groups: GM12878, K562 and H1ESC had a larger percentage of transcribed genes with MAE chromatin signature (about 25%), while the rest had a lower percentage (about 15%). A large number of predicted MAE genes in ES cells suggest that the allelic choice may occur fairly early in development.</p><p>When assessing overlap between predicted MAE genes in different cell types, several patterns became evident. Only 61 genes were predicted as MAE in all of the seven analyzed cell types. Among commonly expressed genes (∼10,000 genes with at least some evidence of expression in all analyzed cell lines), many are predicted as MAE only in some lines, implying tissue-specific MAE. Among ubiquitously expressed genes within this group, addition of more cell lines rapidly led to a plateau in the cumulative number of MAE genes, making up about 20% of the genes in this set (<xref ref-type="fig" rid="fig5">Figure 5E</xref>). In contrast, when all genes are considered (including cell type-specific ones), the number of genes expressed in at least one cell line and the cumulative number of predicted MAE genes keep rising, since a large fraction of the cell type-specific genes are predicted as MAE (<xref ref-type="fig" rid="fig5">Figure 5F</xref>). With seven cell lines, the total number of genes with evidence of expression in at least one cell line (RPKM&gt;0.1) was 18,248, among which the total number of genes with the chromatin signature of MAE in at least one cell line was 8716 (48%). Extrapolation of this trend indicates that, with a large number of cell types, about half of all genes would be predicted to have MAE in one or more cell type. Qualitatively similar results remain if a higher expression threshold is applied, such as RPKM&gt;1 (39% at seven cell lines).</p><p>Taken together, these analyses lead to two unexpected conclusions: (a) among widely expressed genes, about 20% have a chromatin pattern characteristic for MAE in at least one tissue; (b) taking genes with tissue-specific expression as a group, we estimate that between 39% and 48% have the MAE pattern.</p></sec><sec id="s2-5"><title>General properties of human MAE genes</title><p>The genes with MAE chromatin signature share some pronounced features as a group. Among predicted MAE genes, genes coding for cell surface proteins and those involved in multicellular developmental processes were heavily over-represented. Conversely, housekeeping genes and genes specific to intracellular organelles were highly under-represented. When the same gene ontology categories were assessed in other analyzed cells lines, they were similarly over- or under-represented at high significance levels (<xref ref-type="fig" rid="fig5">Figure 5G</xref>). Since the overlap in actual genes is relatively modest, this suggests that in different cell types MAE affects different genes involved in the same type of activity.</p><p>A particularly important group consists of bivalent genes, which are also associated with overlapping active and inactive histone modifications. In embryonic stem cells, bivalent genes are transcriptionally silent, and their promoters carry overlapping peaks of H3K4me3 and H3K27me3 (<xref ref-type="bibr" rid="bib39">Mikkelsen et al., 2007</xref>). These genes play a crucial role in determining cell fate during differentiation: upon reaching a point of lineage commitment, this ‘poised’ chromatin allows these genes to rapidly resolve into either an active or inactive state. To our surprise, we found that a remarkably large fraction of the known bivalent genes (&gt;80%) acquire the MAE chromatin signature in at least one of the differentiated cell types, much higher even when compared to other genes that are also silent in ES cells (<xref ref-type="fig" rid="fig6">Figure 6A</xref>). Our observations indicate that upon activation, these master regulator genes preferentially resolve into a state with the characteristic MAE chromatin signature.<fig id="fig6" position="float"><object-id pub-id-type="doi">10.7554/eLife.01256.013</object-id><label>Figure 6.</label><caption><title>MAE chromatin signature in bivalent genes.</title><p>(<bold>A</bold>) Overrepresentation of bivalent genes among predicted MAE genes. Predicted: predicted MAE in at least one cell line and not silent (RPKM &gt; 0.1); not predicted: not predicted MAE in any cell line. Groups of genes: (top) not bivalent, silent in hESC (RPKM &lt; 0.1); (middle) not bivalent, not silent in hESCs; (bottom) reported bivalent in human ES cells. (<bold>B</bold>) A speculative model of MAE establishment in bivalent genes. Genes with bivalent/poised chromatin in promoters are silent in undifferentiated stem cells; two alleles have symmetric distribution of active and inactive histone marks. When such gene is activated upon reaching a point of cell fate determination, either one of the alleles (or both) can become transcriptionally active. The initial choice is stochastic, but it is stable in the clonal progeny. Asymmetric histone modifications in the gene body reflect activity of the alleles.</p><p><bold>DOI:</bold> <ext-link ext-link-type="doi" xlink:href="10.7554/eLife.01256.013">http://dx.doi.org/10.7554/eLife.01256.013</ext-link></p></caption><graphic xlink:href="elife01256f006"/></fig></p></sec></sec><sec id="s3" sec-type="discussion"><title>Discussion</title><p>Using a systematic machine learning approach, we identified and then experimentally validated a specific and sensitive signature for MAE: a gene-body overlap between chromatin marks for active transcription (H3K36me3) and gene silencing (H3K27me3). Interestingly, the promoter signal carried very little additional information relative to the gene-body features (<xref ref-type="fig" rid="fig1s3">Figure1—figure supplement 3</xref>). This could be due to some combination of biological reasons with much lower noise associated with gene body features due to their much greater length. It should also be noted that we focused on promoters and gene bodies because they are unequivocally associated with particular genes. It might be informative to study the involvement of other regulatory elements, such as enhancers. Upon applying this approach to a variety of cell types, we estimate that up to 20% of ubiquitously expressed genes and more than one-third of tissue-specific genes showed the chromatin signature of MAE. Since detection of the MAE signature does not rely on SNPs or on sample clonality, we expect that this approach will make feasible the analysis of MAE in primary samples, including systematic comparisons of normal and diseased tissue.</p><p>Our findings fill three major gaps in our understanding of MAE. First, we have positively identified a molecular correlate of MAE with strong predictive power. We mapped MAE signature genome-wide in multiple cell types. Finally, we uncovered an unexpected relationship between MAE and known drivers of differentiation, bivalent genes, implying a functional role of MAE in this process.</p><p>The overlap of MAE and bivalent genes poses a potential mechanistic puzzle. In the promoters of bivalent genes, both active and inactive modifications are simultaneously present on both alleles (<xref ref-type="bibr" rid="bib3">Bernstein et al., 2006</xref>). Moreover, direct evidence showed individual nucleosomes carrying H3K4me3 or H3K36me3 along with H3K27me3, albeit on the opposite histone H3 tails (<xref ref-type="bibr" rid="bib48">Voigt et al., 2012</xref>). At the same time, asymmetric distribution of H3K27me3 and H3K36me3 within the gene body of MAE genes is consistent with a very simple asymmetric model: H3K27me3 is known to be associated with transcriptionally silenced chromatin, and H3K36me3 is associated with gene bodies of the actively transcribed genes (<xref ref-type="bibr" rid="bib7">Black et al., 2012</xref>). This leads us to speculate that transition of the bivalent genes from transcriptionally silent state with ‘poised’ chromatin into an active state often happens independently for the two alleles. In that scenario, either allele can become stably inactive, with gene body chromatin enriched with H3K27me3, while the other becomes active and enriched with H3K36me3 (<xref ref-type="fig" rid="fig6">Figure 6B</xref>). Once the poised state has resolved into active or inactive state, it locks in that state; with some probability, both alleles become active, and the biallelic state is locked. While the molecular details of this process remain unclear, this model makes testable prediction that for bivalent genes, MAE is established during lineage commitment events, such as ES cell differentiation.</p><p>Our finding of a specific combination of molecular markers associated with autosomal MAE creates opportunities for a deeper understanding of mechanisms underlying this epigenetic phenomenon. The presence of H3K27me3 immediately implies that histone methyltransferase <italic>EZH2</italic>, and possibly other components of the PRC2 complex (<xref ref-type="bibr" rid="bib36">Margueron and Reinberg, 2011</xref>), could be involved in MAE maintenance. Our experimental observations suggest that H3K27me3 and H3K36me3 marks are asymmetrically distributed between the alleles of genes with MAE. Our observations are consistent with the idea that a uniform molecular mechanism might be involved in MAE maintenance genome-wide.</p><p>Our study strongly implies that very similar sets of genes are subject to MAE in different individuals: the presence of the H3K36me3/H3K27me3 chromatin pattern in one individual corresponds to MAE in the same cell type in another individual. A parsimonious hypothesis is that the capacity for MAE is genetically controlled by regulatory DNA sequences, presumably in <italic>cis</italic> to the affected MAE genes. Consistent with this notion, significant inter-species conservation exists among genes subject to MAE in human and mouse (<xref ref-type="bibr" rid="bib54">Zwemer et al., 2012</xref>).</p><p>Epigenetic regulation of monoallelic expression can profoundly affect the relationship between genotype and molecular phenotype (<xref ref-type="bibr" rid="bib43">Pereira et al., 2003</xref>; <xref ref-type="bibr" rid="bib53">Zuo et al., 2007</xref>). One intriguing possibility is raised by a recent observation that genomically identical clones can be dramatically different in terms of tumorigenic properties and drug resistance (<xref ref-type="bibr" rid="bib26">Kreso et al., 2012</xref>). The knowledge of the chromatin signature of MAE will help in exploring the relationship between MAE and functional cellular states in normal development and disease.</p></sec><sec id="s4" sec-type="materials|methods"><title>Materials and methods</title><sec id="s4-1"><title>Automated classification of MAE</title><sec id="s4-1-1"><title>Data</title><p>The chromatin mark data for the ENCODE cell lines was obtained in wig format from the UCSC genome browser The chromatin mark data for HMEC and HCC1954 were obtained from Gene Expression Omnibus Series GSE29127 (<xref ref-type="bibr" rid="bib22a">Hon et al., 2012</xref>), and for PBMC were obtained from Gene Expression Omnibus Series GSE16368 (<xref ref-type="bibr" rid="bib4">Bernstein et al., 2010</xref>). The expression data for all ENCODE cell lines, including GM12878, were obtained from UCSC ENCODE DCC, Gene Expression Omnibus Series GSM958730 (GM12878), GSM958737 (H1hESC), GSM958744 (HSMM), GSM958738 (HUVEC) and GSM958731 (K562).</p></sec><sec id="s4-1-2"><title>Feature generation</title><p>To generate features, continuous signal from ChIP-Seq with a given antibody was integrated over proximal promoter (2.5 kb upstream of the transcription start site), or the whole gene body, and normalized to the signal from input (whole cell extract; WCE). The data were processed using in-house <italic>perl</italic> and <italic>awk</italic> scripts. The raw coverage counts of chromatin marks, and input on the promoter and on the gene body were calculated by summing the signal over the respective area of interest. The resulting raw amount of each mark was normalized to the input as the log2 ratio of mark to input.</p></sec><sec id="s4-1-3"><title>Classifier training, evaluation and prediction</title><p>Genes were split into two sets: training/development and prediction, where training/development set contained genes with known MAE or biallelic status, and the prediction set contained all genes with unknown status. Additional quantile normalization was performed for HMEC and PBMC datasets, using <italic>matlab</italic>. Our training/development set consisted of 270 high confidence MAE genes and 1068 high confidence BAE genes (with at least four clones showing biallelic expression) (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>). We used all genes for which the MAE or biallelic status was ascertained with high confidence to gain maximum information from all available knowledge, including negative examples. Note that not all genes had data for every feature tested in various combinations. For example, after removing genes with no data, there were 266 MAE and 1046 BAE genes with H3K27me3 and H3K36me3, and 706 BAE and 171 MAE genes in the complete feature set.</p><p>We used <italic>Weka 3.7.3</italic> (<xref ref-type="bibr" rid="bib21">Hall et al., 2009</xref>) to train and evaluate classification methods and perform automated feature selection. Initially, the full set of features was provided and greedy hill-climbing approach augmented with a backtracking facility (BestFirst method in Weka: Select Attributes) was used for feature search. The chosen feature subsets were evaluated by considering the individual predictive ability of each feature along with the degree of redundancy between them (CfsSubsetEval method in Weka: Select Attributes). In addition, the features were evaluated individually using information gain with respect to the class (InfoGainAttributeEval in Weka: Select Attributes) and ranked accordingly. The top features were added one by one until no significant gain in performance was observed. In practice, the top two features H3K36me3 and H3K27me3 on the gene body were sufficient to achieve performance on par with the full feature set.</p><p>For evaluation purposes, we used 10-fold cross-validation, a procedure by which the training set is divided into 10 random subsets 10 different times, and each time the classifier is trained on nine subsets and tested on the remaining subset. The F-measure (harmonic mean of precision and recall) was taken as a measure of classifier performance. We trained the best classifiers on the full training/development set and saved the models in <italic>Weka</italic> model files.</p><p>For prediction purposes, we ran <italic>Weka</italic> in prediction mode with the saved model files on the prediction set, and obtained the results as CVS files which were processed in Excel (Data reported in Dataset S1 in Dryad, <xref ref-type="bibr" rid="bib41">Nag et al., 2013</xref>).</p><p>The best and most parsimonious classifier was an alternating decision tree on H3K327me3 and H3K36me3 gene body signal, with default parameters (number of boosting iterations 10, searchpath = ‘expand all paths’). An alternating decision tree consists of decision nodes and prediction nodes. ‘Decision nodes’ specify a predicate condition (e.g., log10H3K27me3 &gt; 1). ‘Prediction nodes’ contain a single number. ADTrees always have prediction nodes as both root and leaves. An instance (gene) is classified by the ADTree by summing any prediction nodes that are traversed while following all paths for which all decision nodes are true. Varying the boosting iterations between 10 and 20 did not substantially alter the results and produced a decline outside of the range.</p></sec></sec><sec id="s4-2"><title>Expression analysis</title><p>Alignment of RNA-seq data was done using Bowtie2 (<xref ref-type="bibr" rid="bib27">Langmead and Salzberg, 2012</xref>), using paired end alignment, seed length = 28, and max seed mismatches = 2. RPKM was calculated using <italic>cufflinks v. 2</italic>, with a gtf file for <italic>Homo sapiens</italic>, UCSC, hg19 obtained from <ext-link ext-link-type="uri" xlink:href="http://cufflinks.cbcb.umd.edu/igenomes.html">http://cufflinks.cbcb.umd.edu/igenomes.html</ext-link>. For comparison, <italic>cufflinks</italic> was run using the multiple aligned files option simultaneously on all aligned files (bam format) of the ENCODE cell lines. The HMEC and HCC1954 were subsequently run together using the same option separately from the rest and quantile normalization to GM12878 reference was performed on the RPKM values.</p></sec><sec id="s4-3"><title>Extraction of allelic counts</title><p>In-house analysis pipeline in <italic>perl</italic> and <italic>awk</italic> was used to generate an SNP-masked reference and to obtain mapped read counts for each SNP. The reference was derived from the hg19 genome by removing non-transcribed regions using gtf annotation, and masking SNP loci. Long-distance bias effects (<xref ref-type="bibr" rid="bib37">McDaniell et al., 2010</xref>) were also removed using in silico sequencing simulation. The reads covering SNP loci were tallied into maternal and paternal ‘hits’.</p><p>In-house <italic>Matlab</italic> analysis pipeline was used to calculate binomial p-value with FDR-correction and perform equivalence testing for each gene. Allelic bias was statistically identified from the resulting SNP allelic counts. As a first step, counts for multiple SNPs spanning the same gene were tallied, since SNPs were never less than one read length apart and each hit was therefore considered the result of an independent Bernoulli trial of a random variable representing gene bias, with complete maternal bias corresponding to parameter value 0, lack of bias to value 0.5, and complete paternal bias to value 1.</p><p>In processing AST-seq data, which had very high coverage per SNP (generally &gt;10<sup>3</sup>), we used equivalence testing and rejection of equivalence as the test for BAE and MAE with equivalence boundaries of 2:1 bias (corresponding to the maximum likelihood estimate parameter value of less than 0.33 or more than 0.67). Due to the much lower coverage in the whole genome RNA-seq experiment, we used FDR-corrected binomial testing of the tallied counts for each of the heterozygous genes for which hits were obtained, along with a threshold of 2:1 bias (corresponding to the maximum likelihood estimate parameter value of less than 0.33 or more than 0.67) to filter out statistically significant but biologically weak bias. To call unbiased (biallelic) genes in each clone, we used equivalence testing, with equivalence boundaries corresponding to the above 2:1 bias. While the use of 2:1 bias as threshold is conventional (<xref ref-type="bibr" rid="bib54">Zwemer et al., 2012</xref>; <xref ref-type="bibr" rid="bib28">Li et al., 2012a</xref>), the results remained qualitatively robust with other thresholds (e.g., 3:1) used. This was due to the fact that a disproportionate fraction of MAE genes showed extreme biases.</p><p>In the analysis of data from the multiplex capture of common coding SNPs using padlock probes (see below), we restricted the analysis to SNPs with 100 reads or more.</p><sec id="s4-3-1"><title>Gene set enrichment analysis</title><p>Gene set enrichment analysis was performed using the GeneTrail online tool with methodology described in <xref ref-type="bibr" rid="bib24">Keller et al. (2007)</xref>.</p></sec></sec><sec id="s4-4"><title>Cell lines</title><sec id="s4-4-1"><title>Cell culture</title><p>GM12878 cell line was obtained from Coriell Cell Repositories. GM13130 polyclonal cell line and GM13130 clones were described before (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>).</p></sec><sec id="s4-4-2"><title>Single cell cloning</title><p>Fluorescence Activated Cell Sorting (FACS) of single live GM12878 cells was performed following Propidium Iodide (PI) staining (5 µg/M cells). We used two conditions for culturing sorted single cells: (a) 50% conditioned media and (b) Mitomycin C (10 and 50 µg/ml) inactivated feeder cells. To control for possible escape of feeder cells from cell-cycle arrest, we used as feeders lymphoblasts from a different individual (GM13130) and genotypes all clones to ensure their identity. Clone expansion was evaluated 21–24 days post sorting.</p></sec><sec id="s4-4-3"><title>Establishing independence of clones</title><p>To identify clones that had unique rearrangement in Igκ or Igλ locus, we performed degenerate RT-PCR as previously described (<xref ref-type="bibr" rid="bib49">Wardemann et al., 2003</xref>).</p></sec><sec id="s4-4-4"><title>Assessment of genome integrity</title><p>To assess for gross genomic abnormalities, unique clones were subjected to metaphase spread analysis according to standard protocol (<xref ref-type="bibr" rid="bib11">Deng et al., 2003</xref>). To assess for smaller-scale changes, we performed SNP6.0 genotyping ana analysis from these clones. Briefly, DNA was extracted from the clones using Qiagen blood mini kit and was subjected to SNP6.0 genotyping according to standard protocol at Vanderbilt Microarray Shared Resource (<ext-link ext-link-type="uri" xlink:href="http://array.mc.vanderbilt.edu/">http://array.mc.vanderbilt.edu/</ext-link>). The data were analyzed using standard <italic>Birdseed</italic> pipeline (<xref ref-type="bibr" rid="bib25">Korn et al., 2008</xref>).</p></sec></sec><sec id="s4-5"><title>Deep sequencing</title><sec id="s4-5-1"><title>Strand specific genome wide RNA sequencing</title><p>Strand-specific RNA-sequencing library was prepared according to <xref ref-type="bibr" rid="bib42">Parkhomchuk et al. (2009)</xref>. Briefly, total RNA was extracted using Trizol reagent (Life Technologies, Carlsbad, CA) using standard protocol. Total RNA was subjected to polyA selection using Poly(A)Purist MAG Kit (Life Technologies). This RNA was DNase treated followed by first strand synthesis in the presence of Actinomycin D and second strand synthesis in the presence of dUTP instead of dTTP. Sheared cDNA (Covaris Inc, Woburn, MA) was end-repaired and subsequently adenylated. Adapters were ligated and ran on gel to cut and elute the pieces of desired size. The eluted DNA was subjected to a treatment by USER enzyme (NEB, Ipswich, MA) to remove the second strand. The resulting mixture was PCR amplified (using primers recommended by Illumina but synthesized by IDT, Coralville, IA) using Phusion High-Fidelity DNA Polymerase (NEB) for 15 cycles followed by agarose gel purification of the band. The library was sequenced using Illumina HiSeq 2000 platform.</p></sec><sec id="s4-5-2"><title>AST-Seq</title><p>ach of the gene-specific primers flanking the targeted SNP had one of two universal tails (F tail: 5′GCG TAC CAC GTG TCG ACT or R tail: 5′GAC GGG CGT ACT AGC GTA). Second round of PCR used these universal tails to introduce unique combinations of 6-nucleotide ‘barcode’ sequences. Genomic DNA (gDNA) was isolated using Qiagen Blood kit. RNA was isolated using Trizol reagent (Life Technologies) using standard protocol. DNase-treated (Turbo, Ambion, Life Technologies) RNA was used to prepare cDNA using SuperScriptIII (Life Technologies). PCR was performed using Klear Taq (KBioscience, UK) (95°C, 15 min enzyme activation followed by 35 cycles of 95°C, 30 s denaturation; 62°C, 30 s annealing; 68°C, 90 s extension; and a final extension of 72°C for 5 min). After two rounds of PCR, barcoded amplicons were mixed in equal amount for preparation of the single library. The mixed amplicons were size-selected on agarose gel, end repaired, adenylated, and finally adapters were ligated. The resulting DNA fragments of desired size range were purified on agarose gel, and pre-amplified for 15 cycles. The library was mixed with high-complexity libraries (20–30%) and subjected to paired-end or single-end sequencing for 100 cycles on Illumina HiSeq 2000. Primer sequences are listed in Dataset S3 in Dryad (<xref ref-type="bibr" rid="bib41">Nag et al., 2013</xref>).</p></sec><sec id="s4-5-3"><title>Chromatin immunoprecipitation</title><p>ChIP was performed as described (<xref ref-type="bibr" rid="bib3">Bernstein et al., 2006</xref>; <xref ref-type="bibr" rid="bib39">Mikkelsen et al., 2007</xref>). Chromatin shearing was performed using Covaris system. The antibodies used were ABE44 (Millipore, Billerica, MA) for H3K27me3 and AB9050 (Abcam, Cambridge, MA and UK) for H3K36me3.</p></sec></sec><sec id="s4-6"><title>Multiplex capture of common coding SNPs</title><sec id="s4-6-1"><title>Experimental method</title><p>To capture the targeted coding SNPs, mixture containing Padlock probes targeting 36,456 common coding SNPs and 200 ng of genomic DNA (or cDNA, ChIPed-DNA) was prepared in 1X Ampligase Buffer (Epicentre, Madison, WI) (<xref ref-type="bibr" rid="bib52">Zhang et al., 2009</xref>). The mixture was denatured at 95°C, gradually cooled and then hybridized at 60°C for 24 hr. The product was circularized after adding AmpliTaq Stoffel (Life Technologies), and Ampligase (Epicentre), in the presence of dNTPs, and incubating at 60°C for 18 hr. Following this, exonuclease I (USB, Affymetrix, Santa Clara, CA) and exonuclease III (USB) were added to cleave the linear DNA by incubating at 37°C for 2 hr and followed by heat inactivation at 90°C. The circularization product was amplified using primer AmpF6.3Sol: AATGATACGGCGACCACCGACACTCTCAGATGTTATCGAGGTCCGAC, AmpR6.3Ind in Kapa SYBR FAST qPCR master mix (Kapa Biosystems, Wilmington, MA) using the following program: 95°C for 2 min, and then six cycles at 98°C for 20 s, 58°C for 20 s, 72°C for 20 s followed by 20 cycles at 98°C for 10 s and 72°C for 20 s. PCR reaction was terminated before the real time PCR curve reached plateau to avoid over-amplification. The amplicons were purified using Qiaquick PCR purification kit (Qiagen, Netherlands), and the products in the expected size range (180 bp) were selected using polyacrylamide (PAGE) gel. The libraries were sequenced on an Illumina Genome Analyzer IIx.</p></sec><sec id="s4-6-2"><title>Read mapping and extraction of allelic counts</title><p>Sequencing reads generated were trimmed 9 bp from both ends, mapped to the human genome (hg19) with <italic>bwa</italic> (<xref ref-type="bibr" rid="bib29">Li and Durbin, 2009</xref>), then quality-recalibrated and locally re-aligned with GATK. Heterozygous SNPs were identified from genomic DNA using GATK, and the allelic counts at these sites were extracted from the <italic>bam</italic> files using Samtools (<xref ref-type="bibr" rid="bib30">Li et al., 2009</xref>).</p></sec><sec id="s4-6-3"><title>Method note 1: relationship of RPKM value and number of transcripts per cell</title><p>We estimate that in lymphoblastoid cells RPKM of 0.1 corresponds to about 0.5–1 transcript per cell. We based our gene expression cutoff of 0.1 RPKM for expressed genes on several considerations: our experimental cell lines are lymphoblastoid, which are smaller than average sized cells. While 100 ng of RNA of cells used by (<xref ref-type="bibr" rid="bib40">Mortazavi et al., 2008</xref>) correspond to approximately 10<sup>3</sup> cells, we require between 1–3 × 10<sup>4</sup> cells, to obtain the same amount of RNA. Hence, if (<xref ref-type="bibr" rid="bib40">Mortazavi et al., 2008</xref>) calculate that RPKM = 1 corresponds to approximately one transcript per cell based on their spike-in data, we adjust the empirical curve by a factor of 10 to obtain 0.1 RPKM to stand for approximately one transcript per cell. In addition, we note that while only 9% of genes with RPKM of 0 in the data for the polyclonal line are detected in our clonal lines, 50% of the genes with 0.1 RPKM are detected (<xref ref-type="bibr" rid="bib46">Rozowsky et al., 2011</xref>). This means that we have direct evidence that at least half of the genes at that level are represented at one transcript per every two (clonal) cells. Given that detection is not perfect, we believe this to be a very conservative estimate. Last but not least, 0.1 RPKM as a detection threshold for whole transcriptome analysis is a widely accepted method (<xref ref-type="bibr" rid="bib33">Lundberg et al., 2010</xref>; <xref ref-type="bibr" rid="bib47">Uhlen et al., 2010</xref>).</p></sec><sec id="s4-6-4"><title>Method note 2: choice of genes for AST-seq validation</title><p>The genes were chosen based on several criteria. First we created a list of genes that had heterozygous SNPs in the primary transcript in both the GM12878 and GM13130 lines. We then looked at dataset generated by the Wold and Gerstein labs on the GM12878 individual to create a subset of genes that were expressed in this cell type. We had very few genes in the predicted MAE table that had heterozygous SNPs in the primary transcript in both the cell lines and were expressed at sufficient level. Many MAE genes chosen also had multiple SNPs per transcript. The BAE genes to be tested were then chosen randomly from the resulting table. These were the genes that were NOT predicted to be MAE.</p></sec></sec></sec></body><back><ack id="ack"><title>Acknowledgements</title><p>We thank A Bortvin, M Kuroda, R Medzhitov, F Winston, and members of the Gimelbrant lab for critical comments on the manuscript, A Alekseenko, B Bernstein, and A Regev for helpful technical suggestions, and SA Gimelbrant, A Landry, JB Lazaro and R Issner for technical help.</p></ack><sec sec-type="additional-information"><title>Additional information</title><fn-group content-type="competing-interest"><title>Competing interests</title><fn fn-type="conflict" id="conf1"><p>The authors declare that no competing interests exist.</p></fn></fn-group><fn-group content-type="author-contribution"><title>Author contributions</title><fn fn-type="con" id="con1"><p>AN, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article</p></fn><fn fn-type="con" id="con2"><p>VS, Conception and design, Acquisition of data, Analysis and interpretation of data, Drafting or revising the article</p></fn><fn fn-type="con" id="con3"><p>H-LF, Acquisition of data, Analysis and interpretation of data</p></fn><fn fn-type="con" id="con4"><p>AM, Acquisition of data, Analysis and interpretation of data</p></fn><fn fn-type="con" id="con5"><p>G-CY, Critical reading of the article, Analysis and interpretation of data</p></fn><fn fn-type="con" id="con6"><p>KZ, Acquisition of data, Contributed unpublished essential data or reagents</p></fn><fn fn-type="con" id="con7"><p>AAG, Conception and design, Analysis and interpretation of data, Drafting or revising the article</p></fn></fn-group></sec><sec sec-type="supplementary-material"><title>Additional files</title><sec sec-type="datasets"><title>Major dataset</title><p>The following datasets were generated:</p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro1"><name><surname>Nag</surname><given-names>A</given-names></name>, <name><surname>Savova</surname><given-names>V</given-names></name>, <name><surname>Fung</surname><given-names>H</given-names></name>, <name><surname>Miron</surname><given-names>A</given-names></name>, <name><surname>Yuan</surname><given-names>G</given-names></name>, <name><surname>Zhang</surname><given-names>K</given-names></name>, <name><surname>Gimelbrant</surname><given-names>AA</given-names></name>, <year>2013</year><x>, </x><source>Data from: Chromatin signature of monoallelic expression</source><x>, </x><ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.5061/dryad.1775k">10.5061/dryad.1775k</ext-link><x>, </x><comment>Publicly available at Dryad (<ext-link ext-link-type="uri" xlink:href="http://datadryad.org/">http://datadryad.org/</ext-link>). Dataset S1, MAE predictions for cell lines; Dataset S2, Result of whole RNA-seq on clones of GM12878 lymphoblastoid clones DF1 and DF2; Dataset S3, AST-seq data; Dataset S4, Results of the experiment with padlock probe.</comment></related-object></p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro2"><name><surname>Nag</surname><given-names>A</given-names></name>, <name><surname>Savova</surname><given-names>V</given-names></name>, <name><surname>Gimelbrant</surname><given-names>AA</given-names></name>, <year>2013</year><x>, </x><source>Whole RNA-seq on clones of GM12878 lymphoblastoid clones DF1 and DF2</source><x>, </x><object-id pub-id-type="art-access-id">GSE52090</object-id><x>; </x><ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE52090">http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE52090</ext-link><x>, </x><comment>Publicly available at GEO (<ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/">http://www.ncbi.nlm.nih.gov/geo/</ext-link>)</comment></related-object></p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro3"><name><surname>Fung</surname><given-names>H-L</given-names></name>, <name><surname>Zhang</surname><given-names>K</given-names></name>, <name><surname>Nag</surname><given-names>A</given-names></name>, <name><surname>Savova</surname><given-names>V</given-names></name>, <name><surname>Gimelbrant</surname><given-names>AA</given-names></name>, <year>2013</year><x>, </x><source>Data for allelic bias analysis in gDNA, cDNA and ChIP with H3K27me3 and H3K36me3 antibodies with multiplexed padlock probe approach</source><x>, </x><object-id pub-id-type="art-access-id">GSE53628</object-id><x>; </x><ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE53628">http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE53628</ext-link><x>, </x><comment>Publicly available at GEO (<ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/">http://www.ncbi.nlm.nih.gov/geo/</ext-link>)</comment></related-object></p><p>The following previously published datasets were used:</p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro4"><name><surname>Dunham</surname><given-names>I</given-names></name>, <name><surname>Kundaje</surname><given-names>A</given-names></name>, <name><surname>Aldred</surname><given-names>SF</given-names></name>, <name><surname>Collins</surname><given-names>PJ</given-names></name>, <name><surname>Davis</surname><given-names>CA</given-names></name>, <name><surname>Doyle</surname><given-names>F</given-names></name>, <etal/>, <year>2012</year><x>, </x><source>Data from: An integrated encyclopedia of DNA elements in the human genome</source><x>, </x><ext-link ext-link-type="uri" xlink:href="http://genome.ucsc.edu/ENCODE">http://genome.ucsc.edu/ENCODE</ext-link>; <ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/info/ENCODE.html">http://www.ncbi.nlm.nih.gov/geo/info/ENCODE.html</ext-link><x>, </x><comment>Publicly available at GEO (<ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/">http://www.ncbi.nlm.nih.gov/geo/</ext-link>).</comment></related-object></p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro5"><name><surname>Bernstein</surname><given-names>BE</given-names></name>, <name><surname>Stamatoyannopoulos</surname><given-names>JA</given-names></name>, <name><surname>Costello</surname><given-names>JF</given-names></name>, <name><surname>Ren</surname><given-names>B</given-names></name>, <name><surname>Milosavljevic</surname><given-names>A</given-names></name>, <name><surname>Meissner</surname><given-names>A</given-names></name>, <etal/>, <year>2012</year><x>, </x><source>Data from: The NIH Roadmap Epigenomics Mapping Consortium</source><x>, </x><object-id pub-id-type="art-access-id">GSE16368</object-id><x>; </x><ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE16368">http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE16368</ext-link><x>, </x><comment>Publicly available at GEO (<ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/">http://www.ncbi.nlm.nih.gov/geo/</ext-link>).</comment></related-object></p><p><related-object content-type="generated-dataset" document-id="Dataset ID and/or url" document-id-type="dataset" document-type="data" id="dataro6"><name><surname>Hon</surname><given-names>GC</given-names></name>, <name><surname>Hawkins</surname><given-names>RD</given-names></name>, <name><surname>Caballero</surname><given-names>OL</given-names></name>, <name><surname>Lo</surname><given-names>C</given-names></name>, <etal/>, <year>2012</year><x>, </x><source>Global DNA hypomethylation coupled to repressive chromatin domain formation and gene silencing in breast cancer</source><x>, </x><object-id pub-id-type="art-access-id">GSE29127</object-id><x>; </x><ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE29127">http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE29127</ext-link><x>, </x><comment>Publicly available at GEO (<ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/geo/">http://www.ncbi.nlm.nih.gov/geo/</ext-link>).</comment></related-object></p></sec></sec><ref-list><title>References</title><ref id="bib1"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Barski</surname><given-names>A</given-names></name><name><surname>Cuddapah</surname><given-names>S</given-names></name><name><surname>Cui</surname><given-names>K</given-names></name><name><surname>Roh</surname><given-names>TY</given-names></name><name><surname>Schones</surname><given-names>DE</given-names></name><name><surname>Wang</surname><given-names>Z</given-names></name><name><surname>Wei</surname><given-names>G</given-names></name><name><surname>Chepelev</surname><given-names>I</given-names></name><name><surname>Zhao</surname><given-names>K</given-names></name></person-group><year>2007</year><article-title>High-resolution profiling of histone methylations in the human genome</article-title><source>Cell</source><volume>129</volume><fpage>823</fpage><lpage>837</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2007.05.009</pub-id></element-citation></ref><ref id="bib2"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Berletch</surname><given-names>JB</given-names></name><name><surname>Yang</surname><given-names>F</given-names></name><name><surname>Disteche</surname><given-names>CM</given-names></name></person-group><year>2010</year><article-title>Escape from X inactivation in mice and humans</article-title><source>Genome Biology</source><volume>11</volume><fpage>213</fpage><pub-id pub-id-type="doi">10.1186/gb-2010-11-6-213</pub-id></element-citation></ref><ref id="bib3"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bernstein</surname><given-names>BE</given-names></name><name><surname>Mikkelsen</surname><given-names>TS</given-names></name><name><surname>Xie</surname><given-names>X</given-names></name><name><surname>Kamal</surname><given-names>M</given-names></name><name><surname>Huebert</surname><given-names>DJ</given-names></name><name><surname>Cuff</surname><given-names>J</given-names></name><name><surname>Fry</surname><given-names>B</given-names></name><name><surname>Meissner</surname><given-names>A</given-names></name><name><surname>Wernig</surname><given-names>M</given-names></name><name><surname>Plath</surname><given-names>K</given-names></name><name><surname>Jaenisch</surname><given-names>R</given-names></name><name><surname>Wagschal</surname><given-names>A</given-names></name><name><surname>Feil</surname><given-names>R</given-names></name><name><surname>Schreiber</surname><given-names>SL</given-names></name><name><surname>Lander</surname><given-names>ES</given-names></name></person-group><year>2006</year><article-title>A bivalent chromatin structure marks key developmental genes in embryonic stem cells</article-title><source>Cell</source><volume>125</volume><fpage>315</fpage><lpage>326</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2006.02.041</pub-id></element-citation></ref><ref id="bib4"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bernstein</surname><given-names>BE</given-names></name><name><surname>Stamatoyannopoulos</surname><given-names>JA</given-names></name><name><surname>Costello</surname><given-names>JF</given-names></name><name><surname>Ren</surname><given-names>B</given-names></name><name><surname>Milosavljevic</surname><given-names>A</given-names></name><name><surname>Meissner</surname><given-names>A</given-names></name><name><surname>Kellis</surname><given-names>M</given-names></name><name><surname>Marra</surname><given-names>MA</given-names></name><name><surname>Beaudet</surname><given-names>AL</given-names></name><name><surname>Ecker</surname><given-names>JR</given-names></name><name><surname>Farnham</surname><given-names>PJ</given-names></name><name><surname>Hirst</surname><given-names>M</given-names></name><name><surname>Lander</surname><given-names>ES</given-names></name><name><surname>Mikkelsen</surname><given-names>TS</given-names></name><name><surname>Thomson</surname><given-names>JA</given-names></name></person-group><year>2010</year><article-title>The NIH Roadmap Epigenomics Mapping Consortium</article-title><source>Nature Biotechnology</source><volume>28</volume><fpage>1045</fpage><lpage>1048</lpage><pub-id pub-id-type="doi">10.1038/nbt1010-1045</pub-id></element-citation></ref><ref id="bib5"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bertram</surname><given-names>L</given-names></name><name><surname>Tanzi</surname><given-names>RE</given-names></name></person-group><year>2012</year><article-title>The genetics of Alzheimer’s disease</article-title><source>Progress in Molecular Biology and Translational Science</source><volume>107</volume><fpage>79</fpage><lpage>100</lpage><pub-id pub-id-type="doi">10.1016/B978-0-12-385883-2.00008-4</pub-id></element-citation></ref><ref id="bib6"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bix</surname><given-names>M</given-names></name><name><surname>Locksley</surname><given-names>RM</given-names></name></person-group><year>1998</year><article-title>Independent and epigenetic regulation of the interleukin-4 alleles in CD4+ T cells</article-title><source>Science</source><volume>281</volume><fpage>1352</fpage><lpage>1354</lpage><pub-id pub-id-type="doi">10.1126/science.281.5381.1352</pub-id></element-citation></ref><ref id="bib7"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Black</surname><given-names>JC</given-names></name><name><surname>Van Rechem</surname><given-names>C</given-names></name><name><surname>Whetstine</surname><given-names>JR</given-names></name></person-group><year>2012</year><article-title>Histone lysine methylation dynamics: establishment, regulation, and biological impact</article-title><source>Molecular cell</source><volume>48</volume><fpage>491</fpage><lpage>507</lpage><pub-id pub-id-type="doi">10.1016/j.molcel.2012.11.006</pub-id></element-citation></ref><ref id="bib8"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Carrel</surname><given-names>L</given-names></name><name><surname>Willard</surname><given-names>HF</given-names></name></person-group><year>2005</year><article-title>X-inactivation profile reveals extensive variability in X-linked gene expression in females</article-title><source>Nature</source><volume>434</volume><fpage>400</fpage><lpage>404</lpage><pub-id pub-id-type="doi">10.1038/nature03479</pub-id></element-citation></ref><ref id="bib9"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chess</surname><given-names>A</given-names></name><name><surname>Simon</surname><given-names>I</given-names></name><name><surname>Cedar</surname><given-names>H</given-names></name><name><surname>Axel</surname><given-names>R</given-names></name></person-group><year>1994</year><article-title>Allelic inactivation regulates olfactory receptor gene expression</article-title><source>Cell</source><volume>78</volume><fpage>823</fpage><lpage>834</lpage><pub-id pub-id-type="doi">10.1016/S0092-8674(94)90562-2</pub-id></element-citation></ref><ref id="bib11"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Deng</surname><given-names>W</given-names></name><name><surname>Tsao</surname><given-names>SW</given-names></name><name><surname>Lucas</surname><given-names>JN</given-names></name><name><surname>Leung</surname><given-names>CS</given-names></name><name><surname>Cheung</surname><given-names>AL</given-names></name></person-group><year>2003</year><article-title>A new method for improving metaphase chromosome spreading</article-title><source>Cytometry Part A : the Journal of the International Society for Analytical Cytology</source><volume>51</volume><fpage>46</fpage><lpage>51</lpage><pub-id pub-id-type="doi">10.1002/cyto.a.10004</pub-id></element-citation></ref><ref id="bib12"><element-citation publication-type="journal"><collab>ENCODE Project Consortium</collab><person-group person-group-type="author"><name><surname>Bernstein</surname><given-names>BE</given-names></name><name><surname>Birney</surname><given-names>E</given-names></name><name><surname>Dunham</surname><given-names>I</given-names></name><name><surname>Green</surname><given-names>ED</given-names></name><name><surname>Gunter</surname><given-names>C</given-names></name><name><surname>Snyder</surname><given-names>M</given-names></name></person-group><year>2012</year><article-title>An integrated encyclopedia of DNA elements in the human genome</article-title><source>Nature</source><volume>489</volume><fpage>57</fpage><lpage>74</lpage><pub-id pub-id-type="doi">10.1038/nature11247</pub-id></element-citation></ref><ref id="bib13"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ernst</surname><given-names>J</given-names></name><name><surname>Kellis</surname><given-names>M</given-names></name></person-group><year>2010</year><article-title>Discovery and characterization of chromatin states for systematic annotation of the human genome</article-title><source>Nature Biotechnology</source><volume>28</volume><fpage>817</fpage><lpage>825</lpage><pub-id pub-id-type="doi">10.1038/nbt.1662</pub-id></element-citation></ref><ref id="bib14"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Farago</surname><given-names>M</given-names></name><name><surname>Rosenbluh</surname><given-names>C</given-names></name><name><surname>Tevlin</surname><given-names>M</given-names></name><name><surname>Fraenkel</surname><given-names>S</given-names></name><name><surname>Schlesinger</surname><given-names>S</given-names></name><name><surname>Masika</surname><given-names>H</given-names></name><name><surname>Gouzman</surname><given-names>M</given-names></name><name><surname>Teng</surname><given-names>G</given-names></name><name><surname>Schatz</surname><given-names>D</given-names></name><name><surname>Rais</surname><given-names>Y</given-names></name><name><surname>Hanna</surname><given-names>JH</given-names></name><name><surname>Mildner</surname><given-names>A</given-names></name><name><surname>Jung</surname><given-names>S</given-names></name><name><surname>Mostoslavsky</surname><given-names>G</given-names></name><name><surname>Cedar</surname><given-names>H</given-names></name><name><surname>Bergman</surname><given-names>Y</given-names></name></person-group><year>2012</year><article-title>Clonal allelic predetermination of immunoglobulin-κ rearrangement</article-title><source>Nature</source><volume>490</volume><fpage>561</fpage><lpage>565</lpage><pub-id pub-id-type="doi">10.1038/nature11496</pub-id></element-citation></ref><ref id="bib15"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Filion</surname><given-names>GJ</given-names></name><name><surname>van Bemmel</surname><given-names>JG</given-names></name><name><surname>Braunschweig</surname><given-names>U</given-names></name><name><surname>Talhout</surname><given-names>W</given-names></name><name><surname>Kind</surname><given-names>J</given-names></name><name><surname>Ward</surname><given-names>LD</given-names></name><name><surname>Brugman</surname><given-names>W</given-names></name><name><surname>de Castro</surname><given-names>IJ</given-names></name><name><surname>Kerkhoven</surname><given-names>RM</given-names></name><name><surname>Bussemaker</surname><given-names>HJ</given-names></name><name><surname>van Steensel</surname><given-names>B</given-names></name></person-group><year>2010</year><article-title>Systematic protein location mapping reveals five principal chromatin types in <italic>Drosophila</italic> cells</article-title><source>Cell</source><volume>143</volume><fpage>212</fpage><lpage>224</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2010.09.009</pub-id></element-citation></ref><ref id="bib16"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gilad</surname><given-names>Y</given-names></name><name><surname>Rifkin</surname><given-names>SA</given-names></name><name><surname>Pritchard</surname><given-names>JK</given-names></name></person-group><year>2008</year><article-title>Revealing the architecture of gene regulation: the promise of eQTL studies</article-title><source>Trends in genetics: TIG</source><volume>24</volume><fpage>408</fpage><lpage>415</lpage><pub-id pub-id-type="doi">10.1016/j.tig.2008.06.001</pub-id></element-citation></ref><ref id="bib17"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gimelbrant</surname><given-names>A</given-names></name><name><surname>Hutchinson</surname><given-names>JN</given-names></name><name><surname>Thompson</surname><given-names>BR</given-names></name><name><surname>Chess</surname><given-names>A</given-names></name></person-group><year>2007</year><article-title>Widespread monoallelic expression on human autosomes</article-title><source>Science</source><volume>318</volume><fpage>1136</fpage><lpage>1140</lpage><pub-id pub-id-type="doi">10.1126/science.1148910</pub-id></element-citation></ref><ref id="bib18"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gimelbrant</surname><given-names>AA</given-names></name><name><surname>Chess</surname><given-names>A</given-names></name></person-group><year>2006</year><article-title>An epigenetic state associated with areas of gene duplication</article-title><source>Genome Research</source><volume>16</volume><fpage>723</fpage><lpage>729</lpage><pub-id pub-id-type="doi">10.1101/gr.5023706</pub-id></element-citation></ref><ref id="bib19"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gimelbrant</surname><given-names>AA</given-names></name><name><surname>Ensminger</surname><given-names>AW</given-names></name><name><surname>Qi</surname><given-names>P</given-names></name><name><surname>Zucker</surname><given-names>J</given-names></name><name><surname>Chess</surname><given-names>A</given-names></name></person-group><year>2005</year><article-title>Monoallelic expression and asynchronous replication of p120 catenin in mouse and human cells</article-title><source>The Journal of Biological Chemistry</source><volume>280</volume><fpage>1354</fpage><lpage>1359</lpage><pub-id pub-id-type="doi">10.1074/jbc.M411283200</pub-id></element-citation></ref><ref id="bib20"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Glaser</surname><given-names>RL</given-names></name><name><surname>Ramsay</surname><given-names>JP</given-names></name><name><surname>Morison</surname><given-names>IM</given-names></name></person-group><year>2006</year><article-title>The imprinted gene and parent-of-origin effect database now includes parental origin of de novo mutations</article-title><source>Nucleic Acids Research</source><volume>34</volume><fpage>D29</fpage><lpage>D31</lpage><pub-id pub-id-type="doi">10.1093/nar/gkj101</pub-id></element-citation></ref><ref id="bib21"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hall</surname><given-names>M</given-names></name><name><surname>Frank</surname><given-names>E</given-names></name><name><surname>Holmes</surname><given-names>G</given-names></name><name><surname>Pfahringer</surname><given-names>B</given-names></name><name><surname>Reutemann</surname><given-names>P</given-names></name><name><surname>Witten</surname><given-names>IH</given-names></name></person-group><year>2009</year><article-title>The WEKA data mining software: an update</article-title><source>SIGKDD Explorations</source><volume>11</volume><fpage>10</fpage><lpage>18</lpage><pub-id pub-id-type="doi">10.1145/1656274.1656278</pub-id></element-citation></ref><ref id="bib22"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Holländer</surname><given-names>GA</given-names></name><name><surname>Zuklys</surname><given-names>S</given-names></name><name><surname>Morel</surname><given-names>C</given-names></name><name><surname>Mizoguchi</surname><given-names>E</given-names></name><name><surname>Mobisson</surname><given-names>K</given-names></name><name><surname>Simpson</surname><given-names>S</given-names></name><name><surname>Terhorst</surname><given-names>C</given-names></name><name><surname>Wishart</surname><given-names>W</given-names></name><name><surname>Golan</surname><given-names>DE</given-names></name><name><surname>Bhan</surname><given-names>AK</given-names></name><name><surname>Burakoff</surname><given-names>SJ</given-names></name></person-group><year>1998</year><article-title>Monoallelic expression of the interleukin-2 locus</article-title><source>Science</source><volume>279</volume><fpage>2118</fpage><lpage>2121</lpage><pub-id pub-id-type="doi">10.1126/science.279.5359.2118</pub-id></element-citation></ref><ref id="bib22a"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hon</surname><given-names>GC</given-names></name><name><surname>Hawkins</surname><given-names>RD</given-names></name><name><surname>Caballero</surname><given-names>OL</given-names></name><name><surname>Lo</surname><given-names>C</given-names></name><name><surname>Lister</surname><given-names>R</given-names></name><name><surname>Pelizzola</surname><given-names>M</given-names></name><name><surname>Valsesia</surname><given-names>A</given-names></name><name><surname>Ye</surname><given-names>Z</given-names></name><name><surname>Kuan</surname><given-names>S</given-names></name><name><surname>Edsall</surname><given-names>LE</given-names></name><name><surname>Camargo</surname><given-names>AA</given-names></name><name><surname>Stevenson</surname><given-names>BJ</given-names></name><name><surname>Ecker</surname><given-names>JR</given-names></name><name><surname>Bafna</surname><given-names>V</given-names></name><name><surname>Strausberg</surname><given-names>RL</given-names></name><name><surname>Simpson</surname><given-names>AJ</given-names></name><name><surname>Ren</surname><given-names>B</given-names></name></person-group><year>2012</year><article-title>Global DNA hypomethylation coupled to repressive chromatin domain formation and gene silencing in breast cancer</article-title><source>Genome Research</source><volume>22</volume><fpage>246</fpage><lpage>258</lpage><pub-id pub-id-type="doi">10.1101/gr.125872.111</pub-id></element-citation></ref><ref id="bib23"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jeffries</surname><given-names>AR</given-names></name><name><surname>Perfect</surname><given-names>LW</given-names></name><name><surname>Ledderose</surname><given-names>J</given-names></name><name><surname>Schalkwyk</surname><given-names>LC</given-names></name><name><surname>Bray</surname><given-names>NJ</given-names></name><name><surname>Mill</surname><given-names>J</given-names></name><name><surname>Price</surname><given-names>J</given-names></name></person-group><year>2012</year><article-title>Stochastic choice of allelic expression in human neural stem cells</article-title><source>Stem Cells</source><volume>30</volume><fpage>1938</fpage><lpage>1947</lpage><pub-id pub-id-type="doi">10.1002/stem.1155</pub-id></element-citation></ref><ref id="bib24"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Keller</surname><given-names>A</given-names></name><name><surname>Backes</surname><given-names>C</given-names></name><name><surname>Lenhof</surname><given-names>HP</given-names></name></person-group><year>2007</year><article-title>Computation of significance scores of unweighted Gene Set Enrichment Analyses</article-title><source>BMC Bioinformatics</source><volume>8</volume><fpage>290</fpage><pub-id pub-id-type="doi">10.1186/1471-2105-8-290</pub-id></element-citation></ref><ref id="bib25"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Korn</surname><given-names>JM</given-names></name><name><surname>Kuruvilla</surname><given-names>FG</given-names></name><name><surname>McCarroll</surname><given-names>SA</given-names></name><name><surname>Wysoker</surname><given-names>A</given-names></name><name><surname>Nemesh</surname><given-names>J</given-names></name><name><surname>Cawley</surname><given-names>S</given-names></name><name><surname>Hubbell</surname><given-names>E</given-names></name><name><surname>Veitch</surname><given-names>J</given-names></name><name><surname>Collins</surname><given-names>PJ</given-names></name><name><surname>Darvishi</surname><given-names>K</given-names></name><name><surname>Lee</surname><given-names>C</given-names></name><name><surname>Nizzari</surname><given-names>MM</given-names></name><name><surname>Gabriel</surname><given-names>SB</given-names></name><name><surname>Purcell</surname><given-names>S</given-names></name><name><surname>Daly</surname><given-names>MJ</given-names></name><name><surname>Altshuler</surname><given-names>D</given-names></name></person-group><year>2008</year><article-title>Integrated genotype calling and association analysis of SNPs, common copy number polymorphisms and rare CNVs</article-title><source>Nature Genetics</source><volume>40</volume><fpage>1253</fpage><lpage>1260</lpage><pub-id pub-id-type="doi">10.1038/ng.237</pub-id></element-citation></ref><ref id="bib26"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kreso</surname><given-names>A</given-names></name><name><surname>O’Brien</surname><given-names>CA</given-names></name><name><surname>van Galen</surname><given-names>P</given-names></name><name><surname>Gan</surname><given-names>O</given-names></name><name><surname>Notta</surname><given-names>F</given-names></name><name><surname>Brown</surname><given-names>AM</given-names></name><name><surname>Ng</surname><given-names>K</given-names></name><name><surname>Ma</surname><given-names>J</given-names></name><name><surname>Wienholds</surname><given-names>E</given-names></name><name><surname>Dunant</surname><given-names>C</given-names></name><name><surname>Pollett</surname><given-names>A</given-names></name><name><surname>Gallinger</surname><given-names>S</given-names></name><name><surname>McPherson</surname><given-names>J</given-names></name><name><surname>Mullighan</surname><given-names>CG</given-names></name><name><surname>Shibata</surname><given-names>D</given-names></name><name><surname>Dick</surname><given-names>JE</given-names></name></person-group><year>2012</year><article-title>Variable clonal repopulation dynamics influence chemotherapy response in colorectal cancer</article-title><source>Science</source><volume>339</volume><fpage>543</fpage><lpage>548</lpage><pub-id pub-id-type="doi">10.1126/science.1227670</pub-id></element-citation></ref><ref id="bib27"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Langmead</surname><given-names>B</given-names></name><name><surname>Salzberg</surname><given-names>SL</given-names></name></person-group><year>2012</year><article-title>Fast gapped-read alignment with Bowtie 2</article-title><source>Nature Methods</source><volume>9</volume><fpage>357</fpage><lpage>359</lpage><pub-id pub-id-type="doi">10.1038/nmeth.1923</pub-id></element-citation></ref><ref id="bib28"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>G</given-names></name><name><surname>Bahn</surname><given-names>JH</given-names></name><name><surname>Lee</surname><given-names>JH</given-names></name><name><surname>Peng</surname><given-names>G</given-names></name><name><surname>Chen</surname><given-names>Z</given-names></name><name><surname>Nelson</surname><given-names>SF</given-names></name><name><surname>Xiao</surname><given-names>X</given-names></name></person-group><year>2012a</year><article-title>Identification of allele-specific alternative mRNA processing via transcriptome sequencing</article-title><source>Nucleic Acids Research</source><volume>40</volume><fpage>e104</fpage><pub-id pub-id-type="doi">10.1093/nar/gks280</pub-id></element-citation></ref><ref id="bib29"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>H</given-names></name><name><surname>Durbin</surname><given-names>R</given-names></name></person-group><year>2009</year><article-title>Fast and accurate short read alignment with Burrows-Wheeler transform</article-title><source>Bioinformatics</source><volume>25</volume><fpage>1754</fpage><lpage>1760</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btp324</pub-id></element-citation></ref><ref id="bib30"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>H</given-names></name><name><surname>Handsaker</surname><given-names>B</given-names></name><name><surname>Wysoker</surname><given-names>A</given-names></name><name><surname>Fennell</surname><given-names>T</given-names></name><name><surname>Ruan</surname><given-names>J</given-names></name><name><surname>Homer</surname><given-names>N</given-names></name><name><surname>Marth</surname><given-names>G</given-names></name><name><surname>Abecasis</surname><given-names>G</given-names></name><name><surname>Durbin</surname><given-names>R</given-names></name></person-group><year>2009</year><article-title>The Sequence Alignment/Map format and SAMtools</article-title><source>Bioinformatics</source><volume>25</volume><fpage>2078</fpage><lpage>2079</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btp352</pub-id></element-citation></ref><ref id="bib31"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>SM</given-names></name><name><surname>Valo</surname><given-names>Z</given-names></name><name><surname>Wang</surname><given-names>J</given-names></name><name><surname>Gao</surname><given-names>H</given-names></name><name><surname>Bowers</surname><given-names>CW</given-names></name><name><surname>Singer-Sam</surname><given-names>J</given-names></name></person-group><year>2012b</year><article-title>Transcriptome-wide survey of mouse CNS-derived cells reveals monoallelic expression within novel gene families</article-title><source>PLOS ONE</source><volume>7</volume><fpage>e31751</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0031751</pub-id></element-citation></ref><ref id="bib32"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Limentani</surname><given-names>GB</given-names></name><name><surname>Ringo</surname><given-names>MC</given-names></name><name><surname>Ye</surname><given-names>F</given-names></name><name><surname>Berquist</surname><given-names>ML</given-names></name><name><surname>McSorley</surname><given-names>EO</given-names></name></person-group><year>2005</year><article-title>Beyond the t-test: statistical equivalence testing</article-title><source>Analytical Chemistry</source><volume>77</volume><fpage>221A</fpage><lpage>226A</lpage><pub-id pub-id-type="doi">10.1021/ac053390m</pub-id></element-citation></ref><ref id="bib33"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lundberg</surname><given-names>E</given-names></name><name><surname>Fagerberg</surname><given-names>L</given-names></name><name><surname>Klevebring</surname><given-names>D</given-names></name><name><surname>Matic</surname><given-names>I</given-names></name><name><surname>Geiger</surname><given-names>T</given-names></name><name><surname>Cox</surname><given-names>J</given-names></name><name><surname>Algenas</surname><given-names>C</given-names></name><name><surname>Lundeberg</surname><given-names>J</given-names></name><name><surname>Mann</surname><given-names>M</given-names></name><name><surname>Uhlen</surname><given-names>M</given-names></name></person-group><year>2010</year><article-title>Defining the transcriptome and proteome in three functionally different human cell lines</article-title><source>Molecular Systems Biology</source><volume>6</volume><fpage>450</fpage><pub-id pub-id-type="doi">10.1038/msb.2010.106</pub-id></element-citation></ref><ref id="bib34"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lyon</surname><given-names>MF</given-names></name></person-group><year>1961</year><article-title>Gene action in the X-chromosome of the mouse (Mus musculus L.)</article-title><source>Nature</source><volume>190</volume><fpage>372</fpage><lpage>373</lpage><pub-id pub-id-type="doi">10.1038/190372a0</pub-id></element-citation></ref><ref id="bib35"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Magklara</surname><given-names>A</given-names></name><name><surname>Yen</surname><given-names>A</given-names></name><name><surname>Colquitt</surname><given-names>BM</given-names></name><name><surname>Clowney</surname><given-names>EJ</given-names></name><name><surname>Allen</surname><given-names>W</given-names></name><name><surname>Markenscoff-Papadimitriou</surname><given-names>E</given-names></name><name><surname>Evans</surname><given-names>ZA</given-names></name><name><surname>Kheradpour</surname><given-names>P</given-names></name><name><surname>Mountoufaris</surname><given-names>G</given-names></name><name><surname>Carey</surname><given-names>C</given-names></name><name><surname>Barnea</surname><given-names>G</given-names></name><name><surname>Kellis</surname><given-names>M</given-names></name><name><surname>Lomvardas</surname><given-names>S</given-names></name></person-group><year>2011</year><article-title>An epigenetic signature for monoallelic olfactory receptor expression</article-title><source>Cell</source><volume>145</volume><fpage>555</fpage><lpage>570</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2011.03.040</pub-id></element-citation></ref><ref id="bib36"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Margueron</surname><given-names>R</given-names></name><name><surname>Reinberg</surname><given-names>D</given-names></name></person-group><year>2011</year><article-title>The Polycomb complex PRC2 and its mark in life</article-title><source>Nature</source><volume>469</volume><fpage>343</fpage><lpage>349</lpage><pub-id pub-id-type="doi">10.1038/nature09784</pub-id></element-citation></ref><ref id="bib37"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>McDaniell</surname><given-names>R</given-names></name><name><surname>Lee</surname><given-names>BK</given-names></name><name><surname>Song</surname><given-names>L</given-names></name><name><surname>Liu</surname><given-names>Z</given-names></name><name><surname>Boyle</surname><given-names>AP</given-names></name><name><surname>Erdos</surname><given-names>MR</given-names></name><name><surname>Scott</surname><given-names>LJ</given-names></name><name><surname>Morken</surname><given-names>MA</given-names></name><name><surname>Kucera</surname><given-names>KS</given-names></name><name><surname>Battenhouse</surname><given-names>A</given-names></name><name><surname>Keefe</surname><given-names>D</given-names></name><name><surname>Collins</surname><given-names>FS</given-names></name><name><surname>Willard</surname><given-names>HF</given-names></name><name><surname>Lieb</surname><given-names>JD</given-names></name><name><surname>Furey</surname><given-names>TS</given-names></name><name><surname>Crawford</surname><given-names>GE</given-names></name><name><surname>Iyer</surname><given-names>VR</given-names></name><name><surname>Birney</surname><given-names>E</given-names></name></person-group><year>2010</year><article-title>Heritable individual-specific and allele-specific chromatin signatures in humans</article-title><source>Science</source><volume>328</volume><fpage>235</fpage><lpage>239</lpage><pub-id pub-id-type="doi">10.1126/science.1184655</pub-id></element-citation></ref><ref id="bib38"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Meyer</surname><given-names>LR</given-names></name><name><surname>Zweig</surname><given-names>AS</given-names></name><name><surname>Hinrichs</surname><given-names>AS</given-names></name><name><surname>Karolchik</surname><given-names>D</given-names></name><name><surname>Kuhn</surname><given-names>RM</given-names></name><name><surname>Wong</surname><given-names>M</given-names></name><name><surname>Sloan</surname><given-names>CA</given-names></name><name><surname>Rosenbloom</surname><given-names>KR</given-names></name><name><surname>Roe</surname><given-names>G</given-names></name><name><surname>Rhead</surname><given-names>B</given-names></name><name><surname>Raney</surname><given-names>BJ</given-names></name><name><surname>Pohl</surname><given-names>A</given-names></name><name><surname>Malladi</surname><given-names>VS</given-names></name><name><surname>Li</surname><given-names>CH</given-names></name><name><surname>Lee</surname><given-names>BT</given-names></name><name><surname>Learned</surname><given-names>K</given-names></name><name><surname>Kirkup</surname><given-names>V</given-names></name><name><surname>Hsu</surname><given-names>F</given-names></name><name><surname>Heitner</surname><given-names>S</given-names></name><name><surname>Harte</surname><given-names>RA</given-names></name><name><surname>Haeussler</surname><given-names>M</given-names></name><name><surname>Guruvadoo</surname><given-names>L</given-names></name><name><surname>Goldman</surname><given-names>M</given-names></name><name><surname>Giardine</surname><given-names>BM</given-names></name><name><surname>Fujita</surname><given-names>PA</given-names></name><name><surname>Dreszer</surname><given-names>TR</given-names></name><name><surname>Diekhans</surname><given-names>M</given-names></name><name><surname>Cline</surname><given-names>MS</given-names></name><name><surname>Clawson</surname><given-names>H</given-names></name><name><surname>Barber</surname><given-names>GP</given-names></name><name><surname>Haussler</surname><given-names>D</given-names></name><name><surname>Kent</surname><given-names>WJ</given-names></name></person-group><year>2013</year><article-title>The UCSC Genome Browser database: extensions and updates 2013</article-title><source>Nucleic Acids Research</source><volume>41</volume><fpage>D64</fpage><lpage>D69</lpage><pub-id pub-id-type="doi">10.1093/nar/gks1048</pub-id></element-citation></ref><ref id="bib39"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mikkelsen</surname><given-names>TS</given-names></name><name><surname>Ku</surname><given-names>M</given-names></name><name><surname>Jaffe</surname><given-names>DB</given-names></name><name><surname>Issac</surname><given-names>B</given-names></name><name><surname>Lieberman</surname><given-names>E</given-names></name><name><surname>Giannoukos</surname><given-names>G</given-names></name><name><surname>Alvarez</surname><given-names>P</given-names></name><name><surname>Brockman</surname><given-names>W</given-names></name><name><surname>Kim</surname><given-names>TK</given-names></name><name><surname>Koche</surname><given-names>RP</given-names></name><name><surname>Lee</surname><given-names>W</given-names></name><name><surname>Mendenhall</surname><given-names>E</given-names></name><name><surname>O’Donovan</surname><given-names>A</given-names></name><name><surname>Presser</surname><given-names>A</given-names></name><name><surname>Russ</surname><given-names>C</given-names></name><name><surname>Xie</surname><given-names>X</given-names></name><name><surname>Meissner</surname><given-names>A</given-names></name><name><surname>Wernig</surname><given-names>M</given-names></name><name><surname>Jaenisch</surname><given-names>R</given-names></name><name><surname>Nusbaum</surname><given-names>C</given-names></name><name><surname>Lander</surname><given-names>ES</given-names></name><name><surname>Bernstein</surname><given-names>BE</given-names></name></person-group><year>2007</year><article-title>Genome-wide maps of chromatin state in pluripotent and lineage-committed cells</article-title><source>Nature</source><volume>448</volume><fpage>553</fpage><lpage>560</lpage><pub-id pub-id-type="doi">10.1038/nature06008</pub-id></element-citation></ref><ref id="bib40"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mortazavi</surname><given-names>A</given-names></name><name><surname>Williams</surname><given-names>BA</given-names></name><name><surname>McCue</surname><given-names>K</given-names></name><name><surname>Schaeffer</surname><given-names>L</given-names></name><name><surname>Wold</surname><given-names>B</given-names></name></person-group><year>2008</year><article-title>Mapping and quantifying mammalian transcriptomes by RNA-Seq</article-title><source>Nature Methods</source><volume>5</volume><fpage>621</fpage><lpage>628</lpage><pub-id pub-id-type="doi">10.1038/nmeth.1226</pub-id></element-citation></ref><ref id="bib41"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nag</surname><given-names>A</given-names></name><name><surname>Savova</surname><given-names>V</given-names></name><name><surname>Fung</surname><given-names>H</given-names></name><name><surname>Miron</surname><given-names>A</given-names></name><name><surname>Yuan</surname><given-names>G</given-names></name><name><surname>Zhang</surname><given-names>K</given-names></name><name><surname>Gimelbrant</surname><given-names>A</given-names></name></person-group><year>2013</year><article-title>Data from: chromatin signature of widespread monoallelic expression</article-title><source>Dryad Digital Repository</source><pub-id pub-id-type="doi">10.5061/dryad.1775k</pub-id></element-citation></ref><ref id="bib42"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Parkhomchuk</surname><given-names>D</given-names></name><name><surname>Borodina</surname><given-names>T</given-names></name><name><surname>Amstislavskiy</surname><given-names>V</given-names></name><name><surname>Banaru</surname><given-names>M</given-names></name><name><surname>Hallen</surname><given-names>L</given-names></name><name><surname>Krobitsch</surname><given-names>S</given-names></name><name><surname>Lehrach</surname><given-names>H</given-names></name><name><surname>Soldatov</surname><given-names>A</given-names></name></person-group><year>2009</year><article-title>Transcriptome analysis by strand-specific sequencing of complementary DNA</article-title><source>Nucleic Acids Research</source><volume>37</volume><fpage>e123</fpage><pub-id pub-id-type="doi">10.1093/nar/gkp596</pub-id></element-citation></ref><ref id="bib43"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pereira</surname><given-names>JP</given-names></name><name><surname>Girard</surname><given-names>R</given-names></name><name><surname>Chaby</surname><given-names>R</given-names></name><name><surname>Cumano</surname><given-names>A</given-names></name><name><surname>Vieira</surname><given-names>P</given-names></name></person-group><year>2003</year><article-title>Monoallelic expression of the murine gene encoding Toll-like receptor 4</article-title><source>Nature Immunology</source><volume>4</volume><fpage>464</fpage><lpage>470</lpage><pub-id pub-id-type="doi">10.1038/ni917</pub-id></element-citation></ref><ref id="bib44"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pernis</surname><given-names>B</given-names></name><name><surname>Chiappino</surname><given-names>G</given-names></name><name><surname>Kelus</surname><given-names>AS</given-names></name><name><surname>Gell</surname><given-names>PG</given-names></name></person-group><year>1965</year><article-title>Cellular localization of immunoglobulins with different allotypic specificities in rabbit lymphoid tissues</article-title><source>The Journal of Experimental Medicine</source><volume>122</volume><fpage>853</fpage><lpage>876</lpage><pub-id pub-id-type="doi">10.1084/jem.122.5.853</pub-id></element-citation></ref><ref id="bib45"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Raval</surname><given-names>A</given-names></name><name><surname>Tanner</surname><given-names>SM</given-names></name><name><surname>Byrd</surname><given-names>JC</given-names></name><name><surname>Angerman</surname><given-names>EB</given-names></name><name><surname>Perko</surname><given-names>JD</given-names></name><name><surname>Chen</surname><given-names>SS</given-names></name><name><surname>Hackanson</surname><given-names>B</given-names></name><name><surname>Grever</surname><given-names>MR</given-names></name><name><surname>Lucas</surname><given-names>DM</given-names></name><name><surname>Matkovic</surname><given-names>JJ</given-names></name><name><surname>Lin</surname><given-names>TS</given-names></name><name><surname>Kipps</surname><given-names>TJ</given-names></name><name><surname>Murray</surname><given-names>F</given-names></name><name><surname>Weisenburger</surname><given-names>D</given-names></name><name><surname>Sanger</surname><given-names>W</given-names></name><name><surname>Lynch</surname><given-names>J</given-names></name><name><surname>Watson</surname><given-names>P</given-names></name><name><surname>Jansen</surname><given-names>M</given-names></name><name><surname>Yoshinaga</surname><given-names>Y</given-names></name><name><surname>Rosenquist</surname><given-names>R</given-names></name><name><surname>de Jong</surname><given-names>PJ</given-names></name><name><surname>Coggill</surname><given-names>P</given-names></name><name><surname>Beck</surname><given-names>S</given-names></name><name><surname>Lynch</surname><given-names>H</given-names></name><name><surname>de la Chapelle</surname><given-names>A</given-names></name><name><surname>Plass</surname><given-names>C</given-names></name></person-group><year>2007</year><article-title>Downregulation of death-associated protein kinase 1 (DAPK1) in chronic lymphocytic leukemia</article-title><source>Cell</source><volume>129</volume><fpage>879</fpage><lpage>890</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2007.03.043</pub-id></element-citation></ref><ref id="bib46"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rozowsky</surname><given-names>J</given-names></name><name><surname>Abyzov</surname><given-names>A</given-names></name><name><surname>Wang</surname><given-names>J</given-names></name><name><surname>Alves</surname><given-names>P</given-names></name><name><surname>Raha</surname><given-names>D</given-names></name><name><surname>Harmanci</surname><given-names>A</given-names></name><name><surname>Leng</surname><given-names>J</given-names></name><name><surname>Bjornson</surname><given-names>R</given-names></name><name><surname>Kong</surname><given-names>Y</given-names></name><name><surname>Kitabayashi</surname><given-names>N</given-names></name><name><surname>Bhardwaj</surname><given-names>N</given-names></name><name><surname>Rubin</surname><given-names>M</given-names></name><name><surname>Snyder</surname><given-names>M</given-names></name><name><surname>Gerstein</surname><given-names>M</given-names></name></person-group><year>2011</year><article-title>AlleleSeq: analysis of allele-specific expression and binding in a network framework</article-title><source>Molecular Systems Biology</source><volume>7</volume><fpage>522</fpage><pub-id pub-id-type="doi">10.1038/msb.2011.54</pub-id></element-citation></ref><ref id="bib10"><element-citation publication-type="journal"><collab>The 1000 Genomes Project Consortium</collab><year>2010</year><article-title>A map of human genome variation from population-scale sequencing</article-title><source>Nature</source><volume>467</volume><fpage>1061</fpage><lpage>1073</lpage><pub-id pub-id-type="doi">10.1038/nature09534</pub-id></element-citation></ref><ref id="bib47"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Uhlen</surname><given-names>M</given-names></name><name><surname>Oksvold</surname><given-names>P</given-names></name><name><surname>Fagerberg</surname><given-names>L</given-names></name><name><surname>Lundberg</surname><given-names>E</given-names></name><name><surname>Jonasson</surname><given-names>K</given-names></name><name><surname>Forsberg</surname><given-names>M</given-names></name><name><surname>Zwahlen</surname><given-names>M</given-names></name><name><surname>Kampf</surname><given-names>C</given-names></name><name><surname>Wester</surname><given-names>K</given-names></name><name><surname>Hober</surname><given-names>S</given-names></name><name><surname>Wernerus</surname><given-names>H</given-names></name><name><surname>Björling</surname><given-names>L</given-names></name><name><surname>Ponten</surname><given-names>F</given-names></name></person-group><year>2010</year><article-title>Towards a knowledge-based Human Protein Atlas</article-title><source>Nature Biotechnology</source><volume>28</volume><fpage>1248</fpage><lpage>1250</lpage><pub-id pub-id-type="doi">10.1038/nbt1210-1248</pub-id></element-citation></ref><ref id="bib48"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Voigt</surname><given-names>P</given-names></name><name><surname>LeRoy</surname><given-names>G</given-names></name><name><surname>Drury</surname><given-names>WJ</given-names><suffix>III</suffix></name><name><surname>Zee</surname><given-names>BM</given-names></name><name><surname>Son</surname><given-names>J</given-names></name><name><surname>Beck</surname><given-names>DB</given-names></name><name><surname>Young</surname><given-names>NL</given-names></name><name><surname>Garcia</surname><given-names>BA</given-names></name><name><surname>Reinberg</surname><given-names>D</given-names></name></person-group><year>2012</year><article-title>Asymmetrically modified nucleosomes</article-title><source>Cell</source><volume>151</volume><fpage>181</fpage><lpage>193</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2012.09.002</pub-id></element-citation></ref><ref id="bib49"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wardemann</surname><given-names>H</given-names></name><name><surname>Yurasov</surname><given-names>S</given-names></name><name><surname>Schaefer</surname><given-names>A</given-names></name><name><surname>Young</surname><given-names>JW</given-names></name><name><surname>Meffre</surname><given-names>E</given-names></name><name><surname>Nussenzweig</surname><given-names>MC</given-names></name></person-group><year>2003</year><article-title>Predominant autoantibody production by early human B cell precursors</article-title><source>Science</source><volume>301</volume><fpage>1374</fpage><lpage>1377</lpage><pub-id pub-id-type="doi">10.1126/science.1086907</pub-id></element-citation></ref><ref id="bib50"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wen</surname><given-names>B</given-names></name><name><surname>Wu</surname><given-names>H</given-names></name><name><surname>Bjornsson</surname><given-names>H</given-names></name><name><surname>Green</surname><given-names>RD</given-names></name><name><surname>Irizarry</surname><given-names>R</given-names></name><name><surname>Feinberg</surname><given-names>AP</given-names></name></person-group><year>2008</year><article-title>Overlapping euchromatin/heterochromatin- associated marks are enriched in imprinted gene regions and predict allele-specific modification</article-title><source>Genome Research</source><volume>18</volume><fpage>1806</fpage><lpage>1813</lpage><pub-id pub-id-type="doi">10.1101/gr.067587.108</pub-id></element-citation></ref><ref id="bib51"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname><given-names>F</given-names></name><name><surname>Babak</surname><given-names>T</given-names></name><name><surname>Shendure</surname><given-names>J</given-names></name><name><surname>Disteche</surname><given-names>CM</given-names></name></person-group><year>2010</year><article-title>Global survey of escape from X inactivation by RNA-sequencing in mouse</article-title><source>Genome Research</source><volume>20</volume><fpage>614</fpage><lpage>622</lpage><pub-id pub-id-type="doi">10.1101/gr.103200.109</pub-id></element-citation></ref><ref id="bib52"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>K</given-names></name><name><surname>Li</surname><given-names>JB</given-names></name><name><surname>Gao</surname><given-names>Y</given-names></name><name><surname>Egli</surname><given-names>D</given-names></name><name><surname>Xie</surname><given-names>B</given-names></name><name><surname>Deng</surname><given-names>J</given-names></name><name><surname>Li</surname><given-names>Z</given-names></name><name><surname>Lee</surname><given-names>JH</given-names></name><name><surname>Aach</surname><given-names>J</given-names></name><name><surname>Leproust</surname><given-names>EM</given-names></name><name><surname>Eggan</surname><given-names>K</given-names></name><name><surname>Church</surname><given-names>GM</given-names></name></person-group><year>2009</year><article-title>Digital RNA allelotyping reveals tissue-specific and allele-specific gene expression in human</article-title><source>Nature Methods</source><volume>6</volume><fpage>613</fpage><lpage>618</lpage><pub-id pub-id-type="doi">10.1038/nmeth.1357</pub-id></element-citation></ref><ref id="bib53"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zuo</surname><given-names>T</given-names></name><name><surname>Wang</surname><given-names>L</given-names></name><name><surname>Morrison</surname><given-names>C</given-names></name><name><surname>Chang</surname><given-names>X</given-names></name><name><surname>Zhang</surname><given-names>H</given-names></name><name><surname>Li</surname><given-names>W</given-names></name><name><surname>Liu</surname><given-names>Y</given-names></name><name><surname>Wang</surname><given-names>Y</given-names></name><name><surname>Liu</surname><given-names>X</given-names></name><name><surname>Chan</surname><given-names>MW</given-names></name><name><surname>Liu</surname><given-names>JQ</given-names></name><name><surname>Love</surname><given-names>R</given-names></name><name><surname>Liu</surname><given-names>CG</given-names></name><name><surname>Godfrey</surname><given-names>V</given-names></name><name><surname>Shen</surname><given-names>R</given-names></name><name><surname>Huang</surname><given-names>TH</given-names></name><name><surname>Yang</surname><given-names>T</given-names></name><name><surname>Park</surname><given-names>BK</given-names></name><name><surname>Wang</surname><given-names>CY</given-names></name><name><surname>Zheng</surname><given-names>P</given-names></name><name><surname>Liu</surname><given-names>Y</given-names></name></person-group><year>2007</year><article-title>FOXP3 is an X-linked breast cancer suppressor gene and an important repressor of the HER-2/ErbB2 oncogene</article-title><source>Cell</source><volume>129</volume><fpage>1275</fpage><lpage>1286</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2007.04.034</pub-id></element-citation></ref><ref id="bib54"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zwemer</surname><given-names>LM</given-names></name><name><surname>Zak</surname><given-names>A</given-names></name><name><surname>Thompson</surname><given-names>BR</given-names></name><name><surname>Kirby</surname><given-names>A</given-names></name><name><surname>Daly</surname><given-names>MJ</given-names></name><name><surname>Chess</surname><given-names>A</given-names></name><name><surname>Gimelbrant</surname><given-names>AA</given-names></name></person-group><year>2012</year><article-title>Autosomal monoallelic expression in the mouse</article-title><source>Genome Biol</source><volume>13</volume><fpage>R10</fpage><pub-id pub-id-type="doi">10.1186/gb-2012-13-2-r10</pub-id></element-citation></ref></ref-list></back><sub-article article-type="article-commentary" id="SA1"><front-stub><article-id pub-id-type="doi">10.7554/eLife.01256.014</article-id><title-group><article-title>Decision letter</article-title></title-group><contrib-group content-type="section"><contrib contrib-type="editor"><name><surname>Gingeras</surname><given-names>Thomas</given-names></name><role>Reviewing editor</role><aff><institution>Cold Spring Harbor Laboratory</institution>, <country>United States</country></aff></contrib></contrib-group></front-stub><body><boxed-text><p>eLife posts the editorial decision letter and author response on a selection of the published articles (subject to the approval of the authors). An edited version of the letter sent to the authors after peer review is shown, indicating the substantive concerns or comments; minor concerns are not usually shown. Reviewers have the opportunity to discuss the decision before the letter is sent (see <ext-link ext-link-type="uri" xlink:href="http://elife.elifesciences.org/review-process">review process</ext-link>). Similarly, the author response typically shows only responses to the major concerns raised by the reviewers.</p></boxed-text><p>Thank you for sending your work entitled “Chromatin signature of widespread monoallelic expression” for consideration at <italic>eLife</italic>. Your article has been favorably evaluated by a Senior editor and 3 reviewers, one of whom is a member of our Board of Reviewing Editors.</p><p>The Reviewing editor and the other reviewers discussed their comments and the Reviewing editor has assembled the following list of areas of clarification and editorial comments that should be addressed.</p><p>1) The authors mentioned that they have explored different features except H3K27me3 and H3K36me3 to classified MAE and BAE genes, but do not describe them. However, it is important to see how other features affect the performance of classification.</p><p>2) The rationale of selecting several thresholds used in the analyses is not discussed nor is the variability of the results described if other higher or lower thresholds are used. This question can be applied to the 2:1 ratio used for RNAseq data to distinguish MAE from BAE genes and the thresholds corresponding to the log H3K36me3/input vs log H3K27me3/input inside of <xref ref-type="fig" rid="fig1">Figure 1B and 1C</xref>.</p><p>3) The predictability of the signature studied should be addressed by either providing information concerning results obtained from other cell types or make more explicit that since this approach was carried out with only a very limited biological context the general applicability is currently uncertain.</p><p>4) The authors describe the chromatin signature of gene bodies. It is not clear why they focused on gene bodies with no mention of promoter versus gene body status, or of other possible regulatory elements.</p><p>5) Why did the authors not choose similar numbers of BAE genes (1068) as MAE (270) to train the model?</p><p>6) Why are only 27% of X-linked genes showed positive evidence of clone-specific allelic bias? Surely this should be higher if this number is of the total expressed genes? How many of the X-linked genes looked at are known escapees?</p><p>7) For <xref ref-type="fig" rid="fig1">Figure 1B</xref> and <xref ref-type="fig" rid="fig4">Figure 4</xref> what percentage of the previously identified monoallelically expressed genes have both chromatin signatures and the number of the genes assayed needs to be included in <xref ref-type="fig" rid="fig4">Figure 4</xref>.</p><p>8) Although the authors attempt to deal with this in the Discussion, it is difficult to understand molecularly how bivalent genes (which are stated to be overrepresented in the predicted monoallelically expressed genes) resolve into a monoallelically expressed gene predicted chromatin signature.</p><p>9) The subsection title “Human MAE subgenome” is obscure and should be clarified.</p></body></sub-article><sub-article article-type="reply" id="SA2"><front-stub><article-id pub-id-type="doi">10.7554/eLife.01256.015</article-id><title-group><article-title>Author response</article-title></title-group></front-stub><body><p><italic>1) The authors mentioned that they have explored different features except H3K27me3 and H3K36me3 to classified MAE and BAE genes, but do not describe them. However, it is important to see how other features affect the performance of classification</italic>.</p><p>We have now added <xref ref-type="fig" rid="fig1s3">Figure1–figure supplement 3</xref> with multiple panels showing performance of other gene body features, as well as promoter features, with the training set genes. The main text has been adjusted accordingly with three new paragraphs starting:</p><p>“We focused on the eight marks that were investigated in a broad variety of cell types: H3K27me3 (histone H3 Lys-27 trimethylation), H3K36me3, H3K4me2, H4K20me3, H3K27ac (histone H3 Lys-27 acetylation), H3K4me1, H3K4me3, H3K9ac (<xref ref-type="fig" rid="fig1">Figure 1A</xref>)…”</p><p>This also addresses one part of point 4.</p><p><italic>2) The rationale of selecting several thresholds used in the analyses is not discussed nor is the variability of the results described if other higher or lower thresholds are used. This question can be applied to the 2:1 ratio used for RNAseq data to distinguish MAE from BAE genes and the thresholds corresponding to the log H3K36me3/input vs log H3K27me3/input inside of</italic> <xref ref-type="fig" rid="fig1"><italic>Figure 1B and 1C</italic></xref>.</p><p>We would like to clarify that the minimum 2:1 ratio of allele-specific counts in RNA-Seq data was used as one necessary but not sufficient criterion of allelic bias (<xref ref-type="fig" rid="fig2">Figure 2C</xref> summarizes additional statistical tests we required). Moreover, presence of allelic expression bias, no matter how strong, was in and of itself insufficient to call a gene MAE. In order to distinguish MAE-based bias from <italic>cis</italic>-regulatory or imprinting-based bias, we also required positive identification of equal or preferential expression of the other allele in other clonal cell populations (see <xref ref-type="fig" rid="fig3">Figure 3B</xref> for examples).</p><p>We have edited the text in the Materials and methods and Results sections to reflect this.</p><p><italic>3) The predictability of the signature studied should be addressed by either providing information concerning results obtained from other cell types or make more explicit that since this approach was carried out with only a very limited biological context the general applicability is currently uncertain</italic>.</p><p>We clarified the text as follows:</p><p>“It should be noted that the correspondence of the chromatin signature to monoallelic expression has only been systematically validated in clonal LCL lines (see <xref ref-type="fig" rid="fig2">Figure 2</xref> and <xref ref-type="fig" rid="fig3">Figure 3</xref>). Analysis in nonclonal cell populations (such as the ENCODE cell lines or isolated <italic>ex vivo</italic> cells) has been mostly restricted to single-cell techniques such as the fluorescent in situ hybridization approach which we have used in PBMCs (<xref ref-type="bibr" rid="bib17">Gimelbrant et al., 2007</xref>). Until a more general study is completed, it remains formally possible that the chromatin signature does not reflect MAE in some or many cell types. Thus the analysis in the rest of this section should be understood as being performed on genes that have a particular chromatin signature, which at least in one cell type strongly correlates with monoallelic expression.”</p><p><italic>4) The authors describe the chromatin signature of gene bodies. It is not clear why they focused on gene bodies with no mention of promoter versus gene body status, or of other possible regulatory elements</italic>.</p><p>We discuss analysis of promoter features in response to point 1. We also added the following to the Discussion:</p><p>“Interestingly, the promoter signal carried very little additional information relative to the gene-body features (see <xref ref-type="fig" rid="fig1s3">Figure 1–figure supplement 3</xref>). This could be due to some combination of biological reasons and higher signal/noise ratio associated with gene body features due to their much greater length. It should also be noted that we assessed promoters and gene bodies because they are unequivocally associated with particular genes. In the future, it might be informative to study the role of other regulatory elements, such as enhancers.”</p><p><italic>5) Why did the authors not choose similar numbers of BAE genes (1068) as MAE (270) to train the model</italic>?</p><p>To clarify this point, we added the following into the “Classifier training, evaluation and prediction” subsection of the Materials and methods:</p><p>“We used all genes for which the MAE or biallelic status was ascertained with high confidence to gain maximum information from all available knowledge, including negative examples.”</p><p><italic>6) Why are only 27% of X-linked genes showed positive evidence of clone-specific allelic bias? Surely this should be higher if this number is of the total expressed genes? How many of the X-linked genes looked at are known escapees</italic>?</p><p>We showed that fraction with respect to all X-linked genes with any coverage, rather than those that had sufficient coverage on the SNPs. That was confusing and we changed the description to include only genes that were sufficiently covered. We adjusted the description accordingly.</p><p><italic>7) For</italic> <xref ref-type="fig" rid="fig1"><italic>Figure 1B</italic></xref> <italic>and</italic> <xref ref-type="fig" rid="fig4"><italic>Figure 4</italic></xref> <italic>what percentage of the previously identified monoallelically expressed genes have both chromatin signatures and the number of the genes assayed needs to be included in</italic> <xref ref-type="fig" rid="fig4"><italic>Figure 4</italic></xref>.</p><p>We added the following text to the caption of <xref ref-type="fig" rid="fig1">Figure 1B</xref>: “Of 270 high-confidence MAE genes, 268 had data for both H3K27me3 and H3K36me3. Of these, 204 (76%) are within predicted MAE region.”</p><p>And we have added the following to the caption of <xref ref-type="fig" rid="fig4">Figure 4</xref>: “Analysis summarized in this figure is based on 482 SNPs within 458 genes.”</p><p><italic>8) Although the authors attempt to deal with this in the Discussion, it is difficult to understand molecularly how bivalent genes (which are stated to be overrepresented in the predicted monoallelically expressed genes) resolve into a monoallelically expressed gene predicted chromatin signature</italic>.</p><p>We rewrote the corresponding paragraph to reflect this.</p><p><italic>9) The subsection title “Human MAE subgenome” is obscure and should be clarified</italic>.</p><p>We changed the section subtitle to read: “General properties of human MAE genes”</p></body></sub-article></article>