|
| 1 | +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 | +// for details. All rights reserved. Use of this source code is governed by a |
| 3 | +// BSD-style license that can be found in the LICENSE file. |
| 4 | + |
| 5 | +/** |
| 6 | + * Code for converting HTML into text, for use during code generation of |
| 7 | + * analyzer and analysis server. |
| 8 | + */ |
| 9 | +library analyzer.src.codegen.text_formatter; |
| 10 | + |
| 11 | +import 'package:html/dom.dart' as dom; |
| 12 | + |
| 13 | +import 'tools.dart'; |
| 14 | + |
| 15 | +final RegExp whitespace = new RegExp(r'\s'); |
| 16 | + |
| 17 | +/** |
| 18 | + * Convert the HTML in [desc] into text, word wrapping at width [width]. |
| 19 | + * |
| 20 | + * If [javadocStyle] is true, then the output is compatable with Javadoc, |
| 21 | + * which understands certain HTML constructs. |
| 22 | + */ |
| 23 | +String nodesToText(List<dom.Node> desc, int width, bool javadocStyle, |
| 24 | + {bool removeTrailingNewLine: false}) { |
| 25 | + _TextFormatter formatter = new _TextFormatter(width, javadocStyle); |
| 26 | + return formatter.collectCode(() { |
| 27 | + formatter.addAll(desc); |
| 28 | + formatter.lineBreak(false); |
| 29 | + }, removeTrailingNewLine: removeTrailingNewLine); |
| 30 | +} |
| 31 | + |
| 32 | +/** |
| 33 | + * Engine that transforms HTML to text. The input HTML is processed one |
| 34 | + * character at a time, gathering characters into words and words into lines. |
| 35 | + */ |
| 36 | +class _TextFormatter extends CodeGenerator { |
| 37 | + /** |
| 38 | + * Word-wrapping width. |
| 39 | + */ |
| 40 | + final int width; |
| 41 | + |
| 42 | + /** |
| 43 | + * The word currently being gathered. |
| 44 | + */ |
| 45 | + String word = ''; |
| 46 | + |
| 47 | + /** |
| 48 | + * The line currently being gathered. |
| 49 | + */ |
| 50 | + String line = ''; |
| 51 | + |
| 52 | + /** |
| 53 | + * True if a blank line should be inserted before the next word. |
| 54 | + */ |
| 55 | + bool verticalSpaceNeeded = false; |
| 56 | + |
| 57 | + /** |
| 58 | + * True if no text has been output yet. This suppresses blank lines. |
| 59 | + */ |
| 60 | + bool atStart = true; |
| 61 | + |
| 62 | + /** |
| 63 | + * True if we are processing a <pre> element, thus whitespace should be |
| 64 | + * preserved. |
| 65 | + */ |
| 66 | + bool preserveSpaces = false; |
| 67 | + |
| 68 | + /** |
| 69 | + * True if the output should be Javadoc compatible. |
| 70 | + */ |
| 71 | + final bool javadocStyle; |
| 72 | + |
| 73 | + _TextFormatter(this.width, this.javadocStyle); |
| 74 | + |
| 75 | + /** |
| 76 | + * Process an HTML node. |
| 77 | + */ |
| 78 | + void add(dom.Node node) { |
| 79 | + if (node is dom.Text) { |
| 80 | + for (String char in node.text.split('')) { |
| 81 | + if (preserveSpaces) { |
| 82 | + wordBreak(); |
| 83 | + write(escape(char)); |
| 84 | + } else if (whitespace.hasMatch(char)) { |
| 85 | + wordBreak(); |
| 86 | + } else { |
| 87 | + resolveVerticalSpace(); |
| 88 | + word += escape(char); |
| 89 | + } |
| 90 | + } |
| 91 | + } else if (node is dom.Element) { |
| 92 | + switch (node.localName) { |
| 93 | + case 'br': |
| 94 | + lineBreak(false); |
| 95 | + break; |
| 96 | + case 'dl': |
| 97 | + case 'dt': |
| 98 | + case 'h1': |
| 99 | + case 'h2': |
| 100 | + case 'h3': |
| 101 | + case 'h4': |
| 102 | + case 'p': |
| 103 | + lineBreak(true); |
| 104 | + addAll(node.nodes); |
| 105 | + lineBreak(true); |
| 106 | + break; |
| 107 | + case 'div': |
| 108 | + lineBreak(false); |
| 109 | + if (node.classes.contains('hangingIndent')) { |
| 110 | + resolveVerticalSpace(); |
| 111 | + indentSpecial('', ' ', () { |
| 112 | + addAll(node.nodes); |
| 113 | + lineBreak(false); |
| 114 | + }); |
| 115 | + } else { |
| 116 | + addAll(node.nodes); |
| 117 | + lineBreak(false); |
| 118 | + } |
| 119 | + break; |
| 120 | + case 'ul': |
| 121 | + lineBreak(false); |
| 122 | + addAll(node.nodes); |
| 123 | + lineBreak(false); |
| 124 | + break; |
| 125 | + case 'li': |
| 126 | + lineBreak(false); |
| 127 | + resolveVerticalSpace(); |
| 128 | + indentSpecial('- ', ' ', () { |
| 129 | + addAll(node.nodes); |
| 130 | + lineBreak(false); |
| 131 | + }); |
| 132 | + break; |
| 133 | + case 'dd': |
| 134 | + lineBreak(true); |
| 135 | + indent(() { |
| 136 | + addAll(node.nodes); |
| 137 | + lineBreak(true); |
| 138 | + }); |
| 139 | + break; |
| 140 | + case 'pre': |
| 141 | + lineBreak(false); |
| 142 | + resolveVerticalSpace(); |
| 143 | + if (javadocStyle) { |
| 144 | + writeln('<pre>'); |
| 145 | + } |
| 146 | + bool oldPreserveSpaces = preserveSpaces; |
| 147 | + try { |
| 148 | + preserveSpaces = true; |
| 149 | + addAll(node.nodes); |
| 150 | + } finally { |
| 151 | + preserveSpaces = oldPreserveSpaces; |
| 152 | + } |
| 153 | + writeln(); |
| 154 | + if (javadocStyle) { |
| 155 | + writeln('</pre>'); |
| 156 | + } |
| 157 | + lineBreak(false); |
| 158 | + break; |
| 159 | + case 'a': |
| 160 | + case 'b': |
| 161 | + case 'body': |
| 162 | + case 'html': |
| 163 | + case 'i': |
| 164 | + case 'span': |
| 165 | + case 'tt': |
| 166 | + addAll(node.nodes); |
| 167 | + break; |
| 168 | + case 'head': |
| 169 | + break; |
| 170 | + default: |
| 171 | + throw new Exception('Unexpected HTML element: ${node.localName}'); |
| 172 | + } |
| 173 | + } else { |
| 174 | + throw new Exception('Unexpected HTML: $node'); |
| 175 | + } |
| 176 | + } |
| 177 | + |
| 178 | + /** |
| 179 | + * Process a list of HTML nodes. |
| 180 | + */ |
| 181 | + void addAll(List<dom.Node> nodes) { |
| 182 | + for (dom.Node node in nodes) { |
| 183 | + add(node); |
| 184 | + } |
| 185 | + } |
| 186 | + |
| 187 | + /** |
| 188 | + * Escape the given character for HTML. |
| 189 | + */ |
| 190 | + String escape(String char) { |
| 191 | + if (javadocStyle) { |
| 192 | + switch (char) { |
| 193 | + case '<': |
| 194 | + return '<'; |
| 195 | + case '>': |
| 196 | + return '>'; |
| 197 | + case '&': |
| 198 | + return '&'; |
| 199 | + } |
| 200 | + } |
| 201 | + return char; |
| 202 | + } |
| 203 | + |
| 204 | + /** |
| 205 | + * Terminate the current word and/or line, if either is in progress. |
| 206 | + */ |
| 207 | + void lineBreak(bool gap) { |
| 208 | + wordBreak(); |
| 209 | + if (line.isNotEmpty) { |
| 210 | + writeln(line); |
| 211 | + line = ''; |
| 212 | + } |
| 213 | + if (gap && !atStart) { |
| 214 | + verticalSpaceNeeded = true; |
| 215 | + } |
| 216 | + } |
| 217 | + |
| 218 | + /** |
| 219 | + * Insert vertical space if necessary. |
| 220 | + */ |
| 221 | + void resolveVerticalSpace() { |
| 222 | + if (verticalSpaceNeeded) { |
| 223 | + writeln(); |
| 224 | + verticalSpaceNeeded = false; |
| 225 | + } |
| 226 | + } |
| 227 | + |
| 228 | + /** |
| 229 | + * Terminate the current word, if a word is in progress. |
| 230 | + */ |
| 231 | + void wordBreak() { |
| 232 | + if (word.isNotEmpty) { |
| 233 | + atStart = false; |
| 234 | + if (line.isNotEmpty) { |
| 235 | + if (indentWidth + line.length + 1 + word.length <= width) { |
| 236 | + line += ' $word'; |
| 237 | + } else { |
| 238 | + writeln(line); |
| 239 | + line = word; |
| 240 | + } |
| 241 | + } else { |
| 242 | + line = word; |
| 243 | + } |
| 244 | + word = ''; |
| 245 | + } |
| 246 | + } |
| 247 | +} |
0 commit comments