-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
65 lines (48 loc) · 2.53 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Getting and Cleaning Data Course Project
# Merge data --------------------------------------------------------------
# This script should be placed along with the UCI Dataset folder in the current
# working directory
x.train <- read.table(file.path('UCI HAR Dataset', 'train', 'X_train.txt'))
x.test <- read.table(file.path('UCI HAR Dataset', 'test', 'X_test.txt'))
y.train <- read.table(file.path('UCI HAR Dataset', 'train', 'y_train.txt'))
y.test <- read.table(file.path('UCI HAR Dataset', 'test', 'y_test.txt'))
subject.train <- read.table(file.path('UCI HAR Dataset', 'train', 'subject_train.txt'))
subject.test <- read.table(file.path('UCI HAR Dataset', 'test', 'subject_test.txt'))
feature.names <- read.table(file.path('UCI HAR Dataset', 'features.txt'),
row.names=1, col.names=c('row_id', 'feature'))
activity.labels <- read.table(file.path('UCI HAR Dataset', 'activity_labels.txt'),
col.names=c('levels', 'labels'))
# Merge training and test sets into one dataset
x <- rbind(x.train, x.test)
y <- rbind(y.train, y.test)
subject <- rbind(subject.train, subject.test)
# Extract only measurements on mean and std ------------------------------------
# Create a logical vector with the indices of features that contain with
# either mean() or std()
mask <- grepl('mean\\(\\)|std\\(\\)', feature.names$feature)
# Remove parentheses from feature names
feature.names$feature <- gsub('\\(|\\)', '', feature.names$feature)
# Replace commans and hyphens with periods
feature.names$feature <- gsub('-|,', '\\.', feature.names$feature)
# Make variable names lowercase
feature.names$feature <- lapply(feature.names$feature, function(x) {tolower(x)})
masked.features <- feature.names$feature[mask]
# Drop unused columns
x <- x[, mask]
# Use descriptive variable names
colnames(x) <- masked.features
# Use descriptive activity names ------------------------------------------
colnames(y) <- c('level')
activity <- factor(y$level,
levels = activity.labels$levels,
labels = activity.labels$labels)
# Appropriately label the data set with descriptive variable name --------
colnames(subject) <- 'subject_id'
x$activity <- activity
x$subject <- subject$subject_id
# Average of each variable for each activity and each subject
library(reshape2)
xMelt <- melt(x, id=c('subject', 'activity'), measure.vars=masked.features)
tidy.wide <- dcast(xMelt, activity + subject ~ variable, mean)
# Write data to disk
write.table(tidy.wide, 'tidy.txt', row.name=F)