-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfixation_dispersion.R
219 lines (166 loc) · 8.64 KB
/
fixation_dispersion.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#' Fixation detection using a dispersion method
#'
#' Detects fixations by assessing dispersion of the eye position, using a method that is similar to that proposed by Salvucci & Goldberg (1996).
#' Evaluates the maximum dispersion (distance) between x/y coordinates across a window of data. Looks for sufficient periods
#' in which this maximum dispersion is below the specified dispersion tolerance. NAs are considered breaks
#' in the data and are not permitted within a valid fixation period.
#'
#' It can take either single participant data or multiple participants where there is a variable for unique participant identification.
#' The function looks for an identifier named `participant_ID` by default and will treat this as multiple-participant data as default,
#' if not it is handled as single participant data, or the participant_ID needs to be specified
#'
#' @param data A dataframe with raw data (time, x, y, trial) for one participant (the standardised raw data form for eyetools)
#' @param min_dur Minimum duration (in milliseconds) of period over which fixations are assessed
#' @param disp_tol Maximum tolerance (in pixels) for the dispersion of values allowed over fixation period
#' @param NA_tol the proportion of NAs tolerated within any window of samples that is evaluated as a fixation
#' @param progress Display a progress bar
#' @param participant_ID the variable that determines the participant identifier. If no column present, assumes a single participant
#' @return a dataframe containing each detected fixation by trial, with mean x/y position in pixel, start and end times, and duration.
#' @export
#' @examples
#' \donttest{
#' data <- combine_eyes(HCL)
#' fixation_dispersion(data, participant_ID = "pNum")
#' }
#'
#' @importFrom utils tail
#' @importFrom pbapply pblapply
#'
#' @references Salvucci, D. D., & Goldberg, J. H. (2000). Identifying fixations and saccades in eye-tracking protocols. Proceedings of the Symposium on Eye Tracking Research & Applications - ETRA '00, 71–78.
fixation_dispersion <- function(data, min_dur = 150, disp_tol = 100, NA_tol = .25, progress = TRUE, participant_ID = "participant_ID") {
#first check for multiple/single ppt data
test <- .check_ppt_n_in(participant_ID, data)
participant_ID <- test[[1]]
data <- test[[2]]
internal_fixation_dispersion <- function(data, min_dur, disp_tol, NA_tol, progress) {
ppt_label <- data[[participant_ID]][1]
data <- split(data,data$trial) # create list from data by trial
# present a progress bar, unless set to false
if (progress) {
data_fix <- pbapply::pblapply(data, trial_level_process, min_dur, disp_tol, NA_tol)
} else {
data_fix <- lapply(data, trial_level_process, min_dur, disp_tol, NA_tol)
}
# take the list data and bind into a single dataframe.
data_fix <- do.call("rbind", data_fix)
data_fix <- as.data.frame(data_fix)
# tidy columns
colnames(data_fix) <- c("trial", "fix_n", "start", "end", "duration", "x", "y",
"prop_NA", "min_dur", "disp_tol")
###if x and y are all NA, return NA as a fixation
if (sum(!is.na(as.numeric(data_fix$x))) == 0 || sum(!is.na(as.numeric(data_fix$y))) == 0) {
trial_fix_store <- matrix(NA,1,7)
}
data_fix <- cbind(ppt_label, data_fix)
colnames(data_fix)[1] <- participant_ID
#row.names(data_fix) <- NULL # remove the row names
return(data_fix)
}
# this is the trial level process that runs on the data for a single trial within the main algorithm
trial_level_process <- function(data, min_dur, disp_tol, NA_tol) {
trial <- data$trial[1]
#if no observations for x or y at all
if (sum(!is.na(data$x)) == 0 || sum(!is.na(data$y)) == 0) {
trial_fix_store <- matrix(NA,1,7)
trial_fix_store <- cbind(trial_fix_store, min_dur) # add param setting
trial_fix_store <- cbind(trial_fix_store, disp_tol) # add param setting
trial_fix_store <- cbind(trial, trial_fix_store) # add trial number
return(trial_fix_store) # returns the fixations for that trial to the main algorithm
} else {
data$time <- data$time - data$time[1] # start trial timestamp values at 0
#get first row number where x and y is NOT NA
min_x <- min(which(!is.na(data$x)))
min_y <- min(which(!is.na(data$y)))
#then get max row number
max_x <- max(which(!is.na(data$x)))
max_y <- max(which(!is.na(data$y)))
#remove the leading and trailing NAs
data <- data[min(min_x, min_y):max(max_x, max_y),]
data$fix_num <- NA # add a column that stores the fix number
first_ts <- 1 # first timestamp of window
last_ts <- 1 # allows step into the loop
fix_cnt <- 1
new_window <- TRUE
while (last_ts <= nrow(data)) { # while window is within limits of data
if (new_window == TRUE){
future_ts <- which(data$time >= data$time[first_ts] + min_dur)
last_ts <- future_ts[1] #gets the earliest timestamp from all future valid ts
if (is.na(last_ts)){
break # last time stamp not valid (beyond window)
}
win <- data[first_ts:last_ts,] # the window of trials to evaluate
if (mean(is.na(win$x)) < NA_tol) { # if within the tolerance of NA_tol
max_d_win <- max(dist(win[,c("x", "y")]),na.rm = TRUE) # get max dispersion across this new window
if (is.infinite(max_d_win)) {
stop("is infinite")
}
} else {
# window has too many NA, so shift along
max_d_win <- disp_tol + 1 # artificially make this not a fixation
}
if(max_d_win <= disp_tol){
# start of a fixation
data$fix_num[first_ts:last_ts] <- fix_cnt
# print(fix_cnt)
new_window = FALSE # not a new window; look to extend fixation
} else {
# looking for the start of a new fixation
# shift window along 1 timestamp
first_ts <- first_ts + 1
last_ts <- last_ts + 1
}
} else { # extend the window
# increase the size of the window by a single timestamp
last_ts <- last_ts + 1
# compute the new distances from this new data point
xy_data <- data[last_ts,c("x", "y")]
xy_win <- win[,c("x", "y")]
max_d_new_data <- max(sqrt((xy_win$x - xy_data$x)^2 + (xy_win$y - xy_data$y)^2))
if (is.na(max_d_new_data) || max_d_new_data >= disp_tol) {
# either NA detected, or
# the addition of data point broke the dispersion threshold
# so make this last data point the first one for a new window
new_window <- TRUE
first_ts <- last_ts
fix_cnt <- fix_cnt + 1 # next fixation
} else { # otherwise this can be included in last fixation
data$fix_num[last_ts] <- fix_cnt # add current fixation number to this timestamp
win <- data[first_ts:last_ts,] # update the window to include this data point
}
}
}
# function to extract summary information from fixations
summarise_fixations <- function(data){
start <- as.numeric(data$time[1]) # first timestamp
end <- as.numeric(data$time[nrow(data)]) # last timestamp
dur <- end-start
mean_x <- as.numeric(round(mean(data$x, na.rm = TRUE)),digits = 0)
mean_y <- as.numeric(round(mean(data$y, na.rm = TRUE)),digits = 0)
prop_NA <- as.numeric(round(mean((is.na(data$x) | is.na(data$y))),digits = 3))
return(c(start, end, dur, mean_x, mean_y, prop_NA))
}
# print(data[1,4])
# get trial summary of fixations
if ((sum(is.na(data$fix_num)) == nrow(data)) == FALSE){
data_s <- split(data,data$fix_num) # create list based on the fixation number from data
trial_fix_store <- t(sapply(data_s, summarise_fixations))
trial_fix_store <- cbind(1:nrow(trial_fix_store), trial_fix_store) #fixation number
}
else {
# no fixations detected - write NAs
trial_fix_store <- matrix(NA,1,7)
}
trial_fix_store <- cbind(trial_fix_store, min_dur) # add param setting
trial_fix_store <- cbind(trial_fix_store, disp_tol) # add param setting
trial_fix_store <- cbind(trial, trial_fix_store) # add trial number
#trial_fix_store <- cbind(ppt_label, trial_fix_store)
return(trial_fix_store) # returns the fixations for that trial to the main algorithm
}
}
data <- split(data, data[[participant_ID]])
out <- lapply(data, internal_fixation_dispersion, min_dur, disp_tol, NA_tol, progress)
out <- do.call("rbind.data.frame", out)
rownames(out) <- NULL
out <- .check_ppt_n_out(out)
return(out)
}