forked from hcs0/Hackers-Delight
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexpand.c.txt
144 lines (125 loc) · 4.44 KB
/
expand.c.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
/* Inverse of compress (right). */
#include <stdio.h>
/* Inverse of compress2 (HD page 151). This version has
no branches in the loop. Eight insns in the loop, giving
8*32 + 2 = 258 insns worst case. */
unsigned expand2(unsigned x, unsigned m) {
unsigned r, s, b; // Result, shift, mask bit.
r = 0;
s = 0;
do {
b = m & 1;
r = r | ((x & b) << s);
s = s + 1;
x = x >> b;
m = m >> 1;
} while (m != 0);
return r;
}
/* Inverse of compress4 (HD page 153).
I don't know a good way to do this. Seems like you
have to move the bits that go a long distance first, to
avoid overwriting. Maybe that problem could be solved by
moving a bit by first subtracting it where it came from,
and then adding it into the target location, with both
subtraction and addition done with xor. Not sure. But we
also have the problem of figuring out which bits move an
odd number of positions, which an odd multiple of 2,
etc. I don't know how to do that.
A solution from Joseph Allen first does part of the
compress right operation, to compute the five "move"
quantities mv, and saves them in an array. Then it
applies them to the data x in the reverse of the order in
which they were computed. Can one do better (mainly,
avoid saving lg(n) values)?
Allen's solution did not work if the irrelevant bits
are nonzero, but below is a modification of his program
that does work in that case.
The assignment to x in the second for-loop is doing
a MUX operation with mv selecting bits from either x or
x << (1 << i). I.e., letting y = x << (1 << i), it is
computing
(x & ~mv) | (y & mv)
by means of the equivalent expression (which saves an
instruction on a machine that doesn't have "and with
complement")
x ^ ((x ^ y) & mv). */
// ------------------------------ cut ----------------------------------
unsigned expand4(unsigned x, unsigned m) {
unsigned m0, mk, mp, mv, t;
unsigned array[5];
int i;
m0 = m; // Save original mask.
mk = ~m << 1; // We will count 0's to right.
for (i = 0; i < 5; i++) {
mp = mk ^ (mk << 1); // Parallel suffix.
mp = mp ^ (mp << 2);
mp = mp ^ (mp << 4);
mp = mp ^ (mp << 8);
mp = mp ^ (mp << 16);
mv = mp & m; // Bits to move.
array[i] = mv;
m = (m ^ mv) | (mv >> (1 << i)); // Compress m.
mk = mk & ~mp;
}
for (i = 4; i >= 0; i--) {
mv = array[i];
t = x << (1 << i);
x = (x & ~mv) | (t & mv);
// x = ((x ^ t) & mv) ^ x; // Alternative for above line.
}
return x & m0; // Clear out extraneous bits.
}
// ---------------------------- end cut --------------------------------
int errors;
void error(unsigned x, unsigned m, unsigned got, unsigned shdbe) {
errors = errors + 1;
printf("Error for x = %08X, m = %08x, got %08X, should be %08X\n",
x, m, got, shdbe);
}
int main(void)
{
int i, n;
unsigned r;
static unsigned test[] = {
// Data Mask Result
0x00000001, 0x80000000, 0x80000000, // These first 4 cases will
0x0000001F, 0x0010084A, 0x0010084A, // work with Allen's "scatter"
0x0000FFFF, 0x55555555, 0x55555555, // function, because the
0x00001FFF, 0x88E00F55, 0x88E00F55, // irrelevant HO bits are 0.
0xFFFFFFFF, 0x80000000, 0x80000000,
0xFFFFFFFF, 0x0010084A, 0x0010084A,
0xFFFFFFFF, 0x55555555, 0x55555555,
0xFFFFFFFF, 0x88E00F55, 0x88E00F55,
0x01234567, 0x0000FFFF, 0x00004567,
0x01234567, 0xFFFF0000, 0x45670000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0, 0, 0,
0, 0xFFFFFFFF, 0,
0xFFFFFFFF, 0, 0,
0x80000000, 0x80000000, 0,
0x55555555, 0x55555555, 0x11111111,
0x55555555, 0xAAAAAAAA, 0x22222222,
0x789ABCDE, 0x0F0F0F0F, 0x0B0C0D0E,
0x789ABCDE, 0xF0F0F0F0, 0xB0C0D0E0,
0x92345678, 0x80000000, 0,
0x12345678, 0xF0035555, 0x50021540,
0x80000000, 0xF0035555, 0,
};
n = sizeof(test)/sizeof(test[0]);
printf("expand2:\n");
for (i = 0; i < n; i += 3) {
r = expand2(test[i], test[i+1]);
if (r != test[i+2])
error(test[i], test[i+1], r, test[i+2]);
}
printf("expand4:\n");
for (i = 0; i < n; i += 3) {
r = expand4(test[i], test[i+1]);
if (r != test[i+2])
error(test[i], test[i+1], r, test[i+2]);
}
if (errors == 0)
printf("Passed all %d cases.\n", n/3);
return errors;
}