-
Notifications
You must be signed in to change notification settings - Fork 9
/
ernst-underreview.bib
95 lines (91 loc) · 4.9 KB
/
ernst-underreview.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Michael Ernst's submitted (under review) papers
%%%
@Misc{LinWPVZE,
author = "Xi Victoria Lin and Chenglong Wang and Deric Pang and Kevin Vu and Luke Zettlemoyer and Michael D. Ernst",
title = "Program synthesis from natural language using recurrent neural networks",
month = feb,
year = 2018,
NEEDcrossref = "*",
NEEDpages = "*",
abstract =
"Oftentimes, a programmer may
have difficulty implementing a desired operation.
Even when the programmer can
describe the goal in English, it can be difficult to translate into code.
Existing resources, such as question-and-answer websites, tabulate specific
operations that someone has wanted to perform in the past, but they are not
effective in generalizing to new tasks, to compound tasks that require
combining previous questions, or sometimes even to variations of listed
tasks.
\par
Our goal is to make programming easier and more productive by letting
programmers use their own words and concepts to express the intended operation,
rather than forcing them
to accommodate the machine by memorizing its grammar.
We have built a system that lets a programmer describe a desired operation
in natural language, then automatically translates it
to a
programming language
for review and approval by the programmer.
Our system, Tellina, does the translation using recurrent neural networks
(RNNs), a state-of-the-art natural language processing technique that we
augmented with slot (argument) filling and other enhancements.
\par
We evaluated Tellina in the context of shell scripting. We trained
Tellina's RNNs on textual descriptions of file system operations and bash
one-liners, scraped from the web. Although recovering completely correct
commands is challenging, Tellina achieves top-3 accuracy of 80\% for producing
the correct
command structure. In a controlled study,
programmers who had
access to Tellina outperformed those who did not, even when Tellina's
predictions were not completely correct,
to a statistically significant degree.",
omitfromcv = 1,
underreview = 1,
basefilename = "nl-to-bash",
category = "Natural language processing",
csetags = "xilin,clwang,dericp,lsz,mernst,mernst-Natural-language-processing,plse",
summary =
"Machine translation can translate one human language to another. Tellina
applies it to a new domain: translating human language to computer programs.",
}
@Misc{ChenGTEHFAJ,
author = "Yiqun Chen and Rahul Gopinath and Anita Tadakamalla and Michael D. Ernst and Reid Holmes and Gordon Fraser and Paul Ammann and Ren{\'e} Just",
authorASCII = "Yiqun Chen and Rahul Gopinath and Anita Tadakamalla and Michael D. Ernst and Reid Holmes and Gordon Fraser and Paul Ammann and Rene Just",
title = "Revisiting the relationship between fault detection, test adequacy criteria, and test set size",
month = may,
year = 2020,
NEEDcrossref = "*",
NEEDpages = "*",
abstract =
"The research community has long recognized a complex interrelationship between
test set size, test adequacy criteria, and test effectiveness in terms of fault
detection. However, there is substantial confusion about the role and importance
of controlling for test set size when assessing and comparing test adequacy
criteria. This paper makes the following contributions:
(1) A review of contradictory analyses of the relationship between fault
detection, test suite size, and test adequacy criteria. Specifically, this paper
addresses the supposed contradiction of prior work and explains why test suite
size is neither a confounding variable, as previously suggested, nor an
independent variable that should be experimentally manipulated.
(2) An explication and discussion of the experimental design and sampling
strategies of prior work, together with a discussion of conceptual and
statistical problems, and specific guidelines for future work.
(3) A methodology for comparing test-adequacy criteria on an equal basis, which
accounts for test suite size by treating it as a covariate.
(4) An empirical evaluation that compares the effectiveness of coverage-based
and mutation-based testing to one another and random testing. Additionally,
this paper proposes probabilistic coupling, a methodology for approximating the
representativeness of a set of test goals for a given set of real faults.",
omitfromcv = 1,
underreview = 1,
basefilename = "test-set-size",
category = "Testing",
csetags = "mernst,mernst-Testing,plse",
summary =
"Previous research reported both that test set size almost entirely explains
fault detection, and that test set size plays no role. This paper resolves
the contradiction and gives correct experimental and statistical methodology.",
}