Skip to content

Commit 429f61a

Browse files
committed
Add a DeleteLeaf operation to Ssurgeon. Will delete a leaf (node with no children) and rearrange all the indices appropriately.
1 parent 203eb06 commit 429f61a

File tree

3 files changed

+108
-0
lines changed

3 files changed

+108
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package edu.stanford.nlp.semgraph.semgrex.ssurgeon;
2+
3+
import java.io.StringWriter;
4+
5+
import edu.stanford.nlp.ling.IndexedWord;
6+
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
7+
import edu.stanford.nlp.semgraph.SemanticGraph;
8+
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
9+
10+
/**
11+
* This action removes all incoming edges for the given node.
12+
* @author lumberjack
13+
*
14+
*/
15+
public class DeleteLeaf extends SsurgeonEdit {
16+
public static final String LABEL = "deleteLeaf";
17+
protected String nodeName; // name of this node
18+
19+
public DeleteLeaf(String nodeName) {
20+
this.nodeName = nodeName;
21+
}
22+
23+
/**
24+
* If executed twice on the same node, the second time there
25+
* will be no further updates
26+
*/
27+
@Override
28+
public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
29+
IndexedWord tgtNode = getNamedNode(nodeName, sm);
30+
if (tgtNode == null) {
31+
return false;
32+
}
33+
for (SemanticGraphEdge edge : sg.outgoingEdgeList(tgtNode)) {
34+
// if there are any outgoing edges, we aren't a leaf
35+
return false;
36+
}
37+
boolean deletedEdge = false;
38+
// use incomingEdgeList so that deleting an edge
39+
// doesn't affect the iteration
40+
for (SemanticGraphEdge edge : sg.incomingEdgeList(tgtNode)) {
41+
deletedEdge = deletedEdge || sg.removeEdge(edge);
42+
}
43+
int deletedIndex = tgtNode.index();
44+
boolean deletedNode = sg.removeVertex(tgtNode);
45+
// TODO: renumber
46+
if (deletedNode) {
47+
AddDep.moveNodes(sg, sm, x -> (x >= deletedIndex), x -> x-1, false);
48+
}
49+
return deletedEdge || deletedNode;
50+
}
51+
52+
@Override
53+
public String toEditString() {
54+
StringWriter buf = new StringWriter();
55+
buf.write(LABEL); buf.write("\t");
56+
buf.write(Ssurgeon.NODENAME_ARG); buf.write("\t"); buf.write(nodeName);
57+
return buf.toString();
58+
}
59+
60+
}

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java

+10
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
* <li> {@code mergeNodes n1 n2}
8888
* <li> {@code killAllIncomingEdges -node node}
8989
* <li> {@code delete -node node}
90+
* <li> {@code deleteLeaf -node node}
9091
* <li> {@code killNonRootedNodes}
9192
* </ul>
9293
*
@@ -154,6 +155,10 @@
154155
* {@code -node} is the node to delete.
155156
* You will only want to do this after separating the node from the parts of the graph you want to keep.
156157
*</p><p>
158+
* {@code deleteLeaf} deletes a node as long as it is a leaf.
159+
* {@code -node} is the node to delete.
160+
* If the node is not a leaf (no outgoing edges), it will not be deleted.
161+
*</p><p>
157162
* {@code killNonRootedNodes} searches the graph and deletes all nodes which have no path to a root.
158163
*</p>
159164
*<p>
@@ -544,6 +549,11 @@ public static SsurgeonEdit parseEditLine(String editLine, Map<String, String> at
544549
throw new SsurgeonParseException("Cannot make a DeleteGraphFromNode out of " + argsBox.nodes.size() + " nodes");
545550
}
546551
return new DeleteGraphFromNode(argsBox.nodes.get(0));
552+
} else if (command.equalsIgnoreCase(DeleteLeaf.LABEL)) {
553+
if (argsBox.nodes.size() != 1) {
554+
throw new SsurgeonParseException("Cannot make a DeleteLeaf out of " + argsBox.nodes.size() + " nodes");
555+
}
556+
return new DeleteLeaf(argsBox.nodes.get(0));
547557
} else if (command.equalsIgnoreCase(EditNode.LABEL)) {
548558
if (argsBox.nodes.size() != 1) {
549559
throw new SsurgeonParseException("Cannot make an EditNode out of " + argsBox.nodes.size() + " nodes");

test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java

+38
Original file line numberDiff line numberDiff line change
@@ -1721,6 +1721,44 @@ public void readXMLOneStepReattach() {
17211721
assertEquals(newSg, expected);
17221722
}
17231723

1724+
1725+
/**
1726+
* Test deleteLeaf, which removes an unwanted leaf and its edges, then renumbers everything
1727+
*<br>
1728+
* Uses a real example from UD_Portuguese-GSD
1729+
*/
1730+
@Test
1731+
public void readXMLDeleteLeaf() {
1732+
String doc = String.join(newline,
1733+
"<ssurgeon-pattern-list>",
1734+
" <ssurgeon-pattern>",
1735+
" <uid>38</uid>",
1736+
" <notes>Test deleting a leaf (only if it's a leaf)</notes>",
1737+
" <language>UniversalEnglish</language>",
1738+
// the real life example used POS tags to make sure "verb" and "clitic" are the right pieces
1739+
" <semgrex>" + XMLUtils.escapeXML("{}=verb . ({word:/-/}=dash . {word:se}=clitic)") + "</semgrex>",
1740+
" <edit-list>combineMWT -node verb -node dash -node clitic</edit-list>",
1741+
" <edit-list>deleteLeaf -node dash</edit-list>",
1742+
" </ssurgeon-pattern>",
1743+
"</ssurgeon-pattern-list>");
1744+
Ssurgeon inst = Ssurgeon.inst();
1745+
List<SsurgeonPattern> patterns = inst.readFromString(doc);
1746+
assertEquals(patterns.size(), 1);
1747+
SsurgeonPattern pattern = patterns.get(0);
1748+
1749+
// the dash should be removed and all words with an index after the dash should have that index decremented
1750+
SemanticGraph sg = SemanticGraph.valueOf("[nobre-6 nmod> [decreto-9 case> com-7 det> o-8] cop> fez-3 punct> --4 expl:pv> [se-5 advmod> [Assim punct> ,-2]]]");
1751+
SemanticGraph expected = SemanticGraph.valueOf("[nobre-5 nmod> [decreto-8 case> com-6 det> o-7] cop> fez-3 expl:pv> [se-4 advmod> [Assim punct> ,-2]]]");
1752+
SemanticGraph newSg = pattern.iterate(sg).first;
1753+
assertEquals(newSg, expected);
1754+
1755+
// here, the dash isn't a leaf any more, so it shouldn't be deleted
1756+
sg = SemanticGraph.valueOf("[nobre-6 nmod> [decreto-9 case> com-7 det> o-8] cop> fez-3 punct> [--4 expl:pv> [se-5 advmod> [Assim punct> ,-2]]]]");
1757+
expected = SemanticGraph.valueOf("[nobre-6 nmod> [decreto-9 case> com-7 det> o-8] cop> fez-3 punct> [--4 expl:pv> [se-5 advmod> [Assim punct> ,-2]]]]");
1758+
newSg = pattern.iterate(sg).first;
1759+
assertEquals(newSg, expected);
1760+
}
1761+
17241762
/**
17251763
* Simple test of an Ssurgeon edit script. This instances a simple semantic graph,
17261764
* a semgrex pattern, and then the resulting actions over the named nodes in the

0 commit comments

Comments
 (0)