17/9/17 HW1
2.61
A !(x ^ ~0)
B !(x ^ 0)
C !((x & 0xFF) ^ 0xFF)
D !(x >> ((sizeof(int) - 1) << 3) ^ 0)
2.62
bool int_shifts_are_arithmetic() {
int x = 0; //需要int型x
return !((~x >> 1) ^ ~x); //通过对右移后首位进行判断
}
2.65
int odd_ones(unsigned x) {
x ^= (x >> 16); //对比前一半和后一半,异或得到的1的个数的奇偶性与原数的1的个数的奇偶性相同
x ^= (x >> 8);
x ^= (x >> 4);
x ^= (x >> 2);
x ^= (x >> 1);
return x & 1;
}
第一次作业反馈
第一题:正确
第二题:正确
第三题:正确
17/9/23 HW2
2.88
A B
1 01110 001 -9/16 1 0110 0010 -9/16
0 10110 101 208 0 1110 1010 208
1 00111 110 -7/1024 1 0000 0111 -7/1024
0 00000 101 5/2^17 0 0000 0001 1/2^10
1 11011 000 -2^12 1 1110 1111 -248
0 11000 100 768 0 1111 0000 正无穷
2.92
float_bite float_negate(float_bite f) {
if ((f & 0x7fffffff) > 0x7f800000) { //NaN直接返回f
return f;
}
else { //否则均将符号位取反返回
return f ^ 0x80000000;
}
}
2.96
int float_f2i(float_bite f) {
unsigned signal = f & 0x80000000;
unsigned exp = (f >> 23) & 0xff - 0x7f;
unsigned res = f & 0x7fffff + 0x800000;
if (exp > 31) return 0x80000000; //如果指数大于31,必然溢出,返回0x80000000 //应为exp > 30,31会溢出至符号位
else if (exp < -1) return 0; //如果指数小于-1,必然舍入到0,返回0
else if (exp >= 23) { //如果指数在23到31之间,只需把res(frac)部分左移若干位即可
res = res << (exp - 23);
}
else {
unsigned fg = 1;
int n = 22 - exp;
for (int i = 0; i < n; i++) fg = fg << 1 + 1; //制造长度为n = 23 - exp的都为1的串,用来取出需要舍入的若干位
unsigned rounding = (res & fg) > (1 << n); //判断舍入时,取出若干位,大于一半直接进位
if (!((res & (fg << 1 + 1)) ^ (3 << n))) rounding = 1; //如果恰好一半且末位为1,需要进位
res = res >> (n + 1);
if (rounding) res = res + 1;
}
if (signal == 0) return res;
else return -res; //符号位为1时返回相反数
}
第二次作业反馈
第一题:正确
第二题:正确
第三题:有误
17/10/01 HW3
3.60
A x in %rdi
n in %ecx, %esi
result in %rax
mask in %rdx
B result = 0
mask = 1
C mask != 0
D mask = mask << (n & 0xff)
E result = result | (x & mask)
F
long loop(long x, int n)
{
long result = 0;
long mask;
for (mask = 1; mask != 0; mask = mask << (n & 0xff)) {
result |= (x & mask);
}
return result;
}
/* 有一个问题,在汇编代码中,n被移入%ecx之后,在第10行
salq %c1, %rdx 中,只用了n的后八位。但根据逻辑,n的值
不可能很大,不然无法进行左移操作,所以需要把D写成
mask = mask << (n & 0xff) 吗? */
3.62
/* Enumerated type creates set fo constants numbered 0 and upward */
typedef enum {MODE_A, MODE_B, MODE_C, MODE_E} mode_t;
long switch3(long *p1, long *p2, mode_t action)
{
long result = 0;
switch(action) {
case MODE_A:
result = *p2;
action = *p1;
*p2 = action;
break;
case MODE_B:
result = *p1;
result += *p2;
*p1 = result;
break;
case MODE_C:
*p1 = 59;
result = *p2;
break;
case MODE_D:
result = *p2;
*p1 = result;
result = 27;
break;
case MODE_E:
result = 27;
break;
default:
result = 12;
}
return result;
} //需要将MODE_A到MODE_E强制类型转换到mode_t
3.64
long store_ele(long i, long j, long k, long *dest)
i in %rdi, j in %rsi, k in %rdx, dest in %rcx
store_ele:
leaq (%rsi, %rsi, 2), %rax // %rax = 3 * j
leaq (%rsi, %rax, 4), %rax // %rax = 13 * j
movq %rdi, %rsi // %rsi = i
salq $6, %rsi // %rsi = i << 6 = 64 * i
addq %rsi, %rdi // %rdi = 65 * i
addq %rax, %rdi // %rdi = 65 * i + 13 * j
addq %rdi, %rdx // %rdx = 65 * i + 13 * j + k
movq A(,%rdx,8), %rax
movq %rax, (%rcx) // *dest = A + %rdx
movl $3640, %eax // result = 3640
ret
A &Ai[k] = Xa + L * (k + T * (j + S * i)) //问一下,那个公式3.1在哪里??
B 65 * i + 13 * j + k = k + T * (j + S * i)
3640 = 8 * R * S * T
解得 R = 7, S = 5, T = 13
第三次作业反馈
第一题:正确
第二题:有误
第三题:正确
20171012 HW4
3.68
typedef struct {
int x[A][B];
long y;
} str1;
typedef struct {
char array[B];
int t;
short s[A];
long u;
} str2;
void setVal(str1 *p, str2 *q) {
long v1 = q->t;
long v2 = q->u;
p->y = v1 + v2;
}
void setVal(str1 *p, str2 *q)
p in %rdi, q in %rsi
setVal:
movslq 8(%rsi) %rax //B在(4,8]间,此处的8很可能是对齐所致,
addq 32(%rsi) %rax //32 - 8 - 4 = 20,但因为最后u为long型,A的值可能是9或10
movq %rax 184(%rdi) //若不对齐,A * B = 46,无法分解;对齐的话A * B = 45 = 5 * 9
ret //所以A = 9, B = 5
A = 9, B = 5
3.69
typedef struct {
int first;
a_struct a[CNT];
int last;
} b_struct;
void test(long i, b_struct *bp)
{
int n = bp->first + bp->last;
a_struct *ap = &bp->a[i];
ap->x[ap->idx] = n;
}
void test(long i, b_struct *bp)
i in %rdi, bp in %rsi
:
mov 0x120(%rsi) %ecx
add (%rsi) %ecx
lea (%rdi,%rdi,4) %rax
lea (%rsi,%rax,8) %rax
mov 0x8(%rax) %rdx
movslq %ecx %rcx
mov %rcx 0x10(%rax,%rdx,8)
retq
A.
由第五行,0x8(%rax),idx放在a_struct的头部,first对齐到8
所以CNT个a_struct的大小为0x120 - 8 = 0x118 = 280
%rax中存的是bp + 40 * i,所以一个a_struct的大小为40
所以CNT = 7
B.
%rdx中存的是idx,数组x中的每个元素大小为8,x有符号
所以x的类型是long
第五行中,mov将idx存入%rdx,没有转化的步骤,所以idx的类型也是long
typedef struct {
long idx;
long x[4];
} a_struct;
3.70
union ele {
struct {
long *p;
long y;
} e1;
struct {
long x;
union ele *next;
} e2;
}; //联合中可以嵌套结构
A.
e1.p 0
e1.y 8
e2.x 0
e2.next 8
B.
总共需要16字节
C.
void proc (union ele *up)
up in %rdi
proc:
movq 8(%rdi) %rax //%rax = up->e2.next
movq (%rax) %rdx //%rdx = up->e2.next->e1.p
movq (%rdx) %rdx
subq 8(%rax) %rdx //%rdx - 8(%rax)
movq %rdx (%rdi) //等号
ret
void proc (union ele *up) {
up->e2.x = *(up->e2.next->e1.p) - up->e2.next->e1.y;
}
第四次作业反馈
第一题:正确
第二题:正确
第三题:正确
20171030 HW5
4.47
/* Bubble sort: Array version */
void bubble_a(long *data, long count) {
long i, last;
for (last = count - 1; last > 0; last--) {
for (i = 0; i < last; i++)
if (data[i + 1] < data[i]) {
/* Swap adjacent elements */
long t = data[i + 1];
data[i + 1] = data[i];
data[i] = t;
}
}
}
A.
void bubble_a(long *data, long count) {
long* i, last;
for (last = data + count - 1; last != data; last--) {
for (i = data; i != last; i++)
{
if (*(i + 1) < *i) {
/* Swap adjacent elements */
long tmp = *(i + 1);
*(i + 1) = *i;
*i = tmp;
}
}
}
}
B.
pushl %ebp
rrmovl %esp, %ebp
rmmovl %edi, -0x28(%ebp)
rmmovl %esi, -0x30(%ebp)
mrmovl -0x30(%ebp), %eax
addl %eax, %eax
addl %eax, %eax
addl %eax, %eax
irmovl $0x8, %ecx
subl %ecx, %eax
rrmovl %eax, %edx
mrmovl -0x28(%ebp), %eax
addl %edx, %eax
rmmovl %eax, -0x10(%ebp)
jmp test1
loop1:
mrmovl -0x28(%ebp), %eax
rmmovl %eax, -0x18(%ebp)
jmp test2
loop2:
mrmovl -0x18(%ebp), %eax
addl %ecx, %eax
mrmovl (%eax), %edx
mrmovl -0x18(%ebp), %eax
mrmovl (%eax), %eax
rrmovl %edx, %ebx
subl %eax, %edx
jge NoSwap
mrmovl -0x18(%ebp), %eax
mrmovl 0x8(%eax), %eax
rmmovl %eax, -0x8(%ebp)
mrmovl -0x18(%ebp), %eax
irmovl $0x8, %ecx
addl %ecx, %eax
mrmovl (%eax), %edx
mrmovl -0x18(%ebp), %eax
mrmovl (%eax), %eax
rmmovl %eax, (%edx)
mrmovl -0x18(%ebp), %eax
mrmovl -0x8(%ebp), %edx
rmmovl %edx, (%eax)
NoSwap:
mrmovl -0x18(%ebp), %eax
irmovl $0x8, %ecx
addl %ecx, %eax
rmmovl %eax, -0x18(%ebp)
test2:
mrmovl -0x10(%ebp), %eax
mrmovl -0x18(%ebp), %edx
subl %eax, %edx
jne loop2
irmovl $0x8, %ecx
subl %ecx, %eax
rmmovl %eax, -0x10(%ebp)
test1:
mrmovl -0x10(%ebp), %eax
mrmovl -0x28(%ebp), %edx
subl %edx, %eax
jne loop1
popl %ebp
ret
4.56
int f_pc = [
# Mispredicted branch. Fetch at incremented PC
M_icode == IJXX && M_valE < M_valA && !M_Cnd : M_valA;
M_icode == IJXX && M_valE > M_valA && M_Cnd : M_valE; //不考虑M_valE == M_valA,感觉无意义
# Completion of RET instruction.
W_icode == IRET : W_valM;
# Default: Use predicted value of PC
1 : F_predPC;
];
int f_predPC = [
# BBTFNT: This is where you'll change the branch prediction rule
f_icode in { ICALL } : f_valC;
f_icode in { IJXX } && ( f_ifun == 0 || f_valC < f_valP ) : f_valC;
1 : f_valP;
];
int aluA = [
E_icode in { IRRMOVL, IOPL } : E_valA;
E_icode in { IIRMOVL, IRMMOVL, IMRMOVL, IJXX } : E_valC;
E_icode in { ICALL, IPUSHL } : -4;
E_icode in { IRET, IPOPL } : 4;
# Other instructions don't need ALU
];
int aluB = [
E_icode in { IRMMOVL, IMRMOVL, IOPL, ICALL,
IPUSHL, IRET, IPOPL } : E_valB;
E_icode in { IRRMOVL, IIRMOVL, IJXX } : 0;
# Other instructions don't need ALU
];
bool D_bubble =
# Mispredicted branch
(E_icode == IJXX && ((E_valC < E_valA && !e_Cnd) || (E_valC > E_valA && e_Cnd))) ||
# BBTFNT: This condition will change
# Stalling at fetch while ret passes through pipeline
# but not condition for a load/use hazard
!(E_icode in { IMRMOVL, IPOPL } && E_dstM in { d_srcA, d_srcB }) &&
IRET in { D_icode, E_icode, M_icode };
bool E_bubble =
# Mispredicted branch
(E_icode == IJXX && ((E_valC < E_valA && !e_Cnd) || (E_valC > E_valA && e_Cnd))) ||
# BBTFNT: This condition will change
# Conditions for a load/use hazard
E_icode in { IMRMOVL, IPOPL } &&
E_dstM in { d_srcA, d_srcB};
5.13
/* Inner product. Accumulate in temporary */
void inner4(vec_ptr u, vec_ptr v, data_t *dest)
{
long i;
long length = vec_length(u);
data_t *udata = get_vec_start(u);
data_t *vdata = get_vec_start(v);
data_t sum = (data_t) 0;
for (i = 0; i < length; i++) {
sum = sum + udata[i] * vdata[i];
}
*dest = sum;
}
Inner loop of inner4. data_t = double, OP = *
udata in %rbp, vdata in %rax, sum in %xmm0
i in %rcx, limit in %rbx
.L15:
loop:
vmovsd 0(%rbp, %rcx, 8), %xmm1 Get udata[i]
vmulsd (%rax, %rcx, 8), %xmm1, %xmm1 Multiply by vdata[i]
vaddsd %xmm1, %xmm0, %xmm0 Add to sum
addq $1, %rcx Increment i
cmpq %rbx, %rcx Compare i:limit
jne .L15 If !=, goto loop
A.
图片发给助教吧。。。。不会画。。。
最左边的浮点数加法链不断更新寄存器%xmm0储存的值,为程序的关键路径。
B. 3
C. 1
D. 需要的浮点数乘法操作不在关键路径中,故可以按流水线进行,这样限制因素就是吞吐量界限。而整个程序的限制因素最后为浮点加法的延迟,所以就是3.0了。
5.14
void inner4(vec_ptr u, vec_ptr v, data_t *dest)
{
long i;
long length = vec_length(u);
data_t *udata = get_vec_start(u);
data_t *vdata = get_vec_start(v);
data_t sum = (data_t) 0;
for (i = 0; i + 6 < length; i = i + 6) {
sum = sum + udata[i]*vdata[i] + udata[i+1]*vdata[i+1] + udata[i+2]*vdata[i+2]
+ udata[i+3]*vdata[i+3] + udata[i+4]*vdata[i+4] + udata[i+5]*vdata[i+5];
}
for (; i < length; i++) sum = sum + udata[i]*vdata[i];
*dest = sum;
}
A. 因为只有两个加载单元,每个周期只能加载两个数据,这成为了程序最主要的限制因素,所以1.00为CPE的下界。
B. 因为浮点加法单元只有一个,循环展开并不减短关键路径,限制因素仍为若干次的浮点加法,所以没有提高。
第五次作业反馈
第一题:Y86有点小问题
第二题:正确
第三题:少画了个图
第四题:正确(i+5<length比较好)
20171120 HW6
7.6
符号 swap.o.symtab条目 符号类型 定义符号的模块 节
buf 有 外部 m.o .data
bufp0 有 全局 swap.o .data
bufp1 有 局部 swap.o .bss
swap 有 全局 swap.o .text
temp 无
incr 有 局部 swap.o .text
count 有 局部 swap.o .bss
7.7
/* foo5.c */
#include <stdio.h>
void f(void);
int y = 15212;
int x = 15213;
int main()
{
f();
printf("x = 0x%x y = 0x%x \n",
x, y);
return 0;
}
/* bar5.c */
static double x;
void f()
{
x = -0.0;
}
7.12
A.
refaddr = ADDR(s) + r.offset;
*refptr = (unsigned) (ADDr(r.symbol) + r.addend - refaddr);
ADDR(s) = 0x4004e0, refaddr = 0x4004ea;
*refptr = 0x4004f8 - 4 - 0x4004ea = 0xa
所以
9: e8 0a 00 00 00 callq 0x4004f8 <swap>
B.
同理可得
9: e8 22 00 00 00 callq 0x400500 <swap>
7.13
A.
libc.a包含1579个.o文件,libm.a包含471个.o文件。
B.
没什么不同吧。。反汇编出来是一样的。。。
C.
linux-vdso.so.1 => (0x00007ffced5d8000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f116aa2b000)
/lib64/ld-linux-x86-64.so.2 (0x000055d98150b000)
Version information:
/usr/bin/gcc:
ld-linux-x86-64.so.2 (GLIBC_2.3) => /lib64/ld-linux-x86-64.so.2
libc.so.6 (GLIBC_2.3) => /lib/x86_64-linux-gnu/libc.so.6
libc.so.6 (GLIBC_2.14) => /lib/x86_64-linux-gnu/libc.so.6
libc.so.6 (GLIBC_2.4) => /lib/x86_64-linux-gnu/libc.so.6
libc.so.6 (GLIBC_2.11) => /lib/x86_64-linux-gnu/libc.so.6
libc.so.6 (GLIBC_2.2.5) => /lib/x86_64-linux-gnu/libc.so.6
libc.so.6 (GLIBC_2.3.4) => /lib/x86_64-linux-gnu/libc.so.6
/lib/x86_64-linux-gnu/libc.so.6:
ld-linux-x86-64.so.2 (GLIBC_2.3) => /lib64/ld-linux-x86-64.so.2
ld-linux-x86-64.so.2 (GLIBC_PRIVATE) => /lib64/ld-linux-x86-64.so.2
所以共享库是libc.so.6
第一题:有一处错误
第二题:正确
第三题:正确
第四题:正确
20171122 HW7
8.9
进程 开始时间 结束时间
A 5 7
B 2 4
C 3 6
D 1 8
进程对 并发?
AB 不是
AC 是
AD 是
BC 是
BD 是
CD 是
8.18
Start 父 父1 子0 子(atexit) 父12 子02
所以从左到右出现2的个数小于出现0和1的个数,ACE有可能出现。
8.24
#include "csapp.h"
#define N 2
int main()
{
int status, i;
pid_t pid;
char errorinfo[100];
/* Parent creates N children */
for (i = 0; i < N; i++)
if ((pid = Fork()) == 0) /* Child */
exit(100+i);
/* Parent reaps N children in no particular order */
while ((pid = waitpid(-1, &status, 0)) > 0) {
if (WIFEXITED(status))
printf("child %d terminated normally with exit status=%d\n",
pid, WEXITSTATUS(status));
else if (WIFSIGNALED(status)) {
printf("child %d terminated by signal %d: ",
pid, WTERMSIG(status));
psignal(WTERMSIG(status), errorinfo);
}
else
printf("child %d terminated abnormally\n", pid);
}
/* The only normal termination is if there are no more children */
if (errno != ECHILD)
unix_error("waitpid error");
exit(0);
}
10.6
输出是4。fd1打开时标记为3,fd2打开时标记为4,后关闭再打开,还是为4。
10.9
if (Fork() == 0) { /* child */
int fd = open("foo.txt", O_RDONLY, 0);
dup2(fd, 1);
close(fd);
Execve("fstatcheck", argv, envp);
}
第一题:正确
第二题:正确
第三题:错误
第四题:正确
第五题:有误
20171205 HW8
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <signal.h>
#include <sys/types.h>
#include <fcntl.h>
#include <errno.h>
#include <setjmp.h>
#include <stdarg.h>
#define UNIT ((unsigned long long) 1 << 12)
#define LIMIT 0xfffffffff
unsigned long long vpn;
unsigned long long va;
int flag;
jmp_buf env;
void handler(int sig)
{
//printf("SIGSEGV catched\n");
siglongjmp(env, 1);
return;
}
int main(int argc, char **argv)
{
signal(SIGSEGV, handler);
char tmp;
for (va = 0x400000, vpn = 0x400; vpn <= LIMIT; va += UNIT, vpn++)
{
int r = sigsetjmp(env, 1);
if (r == 0)
{
flag = 1;
tmp = *(char*)va;
}
else
{
flag = 0;
//printf("jmp\n");
}
if (flag == 1) fprintf(stdout, "VPN: %llx is reachable.\n", vpn);
}
return 0;
}
上述代码可以实现遍历,但运行时会在页号约达到0x1400之后卡住,分析原因是因为之后有大段的空白空间。想到的解决方法是在若干次连续访问失败之后加速跳过大量空白页,直到遇到可访问页再向回退一次减缓访问速度。问题是这个想法并不能保证程序的正确性,所以并没有实现。
所以说,这个程序可以实现遍历打印,就是可能时间会长一些。。。
20171217 HW9
11.8
在主函数中,变量定义之后添加一行Signal(SIGCHLD, sigchld_handler);(相关定义略去),并删去serve_dynamic函数中的wait(NULL);
其中函数代码如下:
/*
* sigchld_handler
*/
void sigchld_handler(int sig)
{
sigset_t mask_all, prev_mask;
pid_t pid;
Sigfillset(&mask_all);
while ((pid = waitpid(-1, NULL, 0)) > 0) {
Sigprocmask(SIG_BLOCK, &mask_all, &prev_mask);
deletejob(pid);
Sigprocmask(SIG_SETMASK, &prev_mask, NULL);
}
}
11.9
修改如下:
/*
* serve_static - copy a file back to the client
*/
/* $begin serve_static */
void serve_static(int fd, char *filename, int filesize)
{
int srcfd;
char *srcp, filetype[MAXLINE], buf[MAXBUF];
/* Send response headers to client */
get_filetype(filename, filetype); //line:netp:servestatic:getfiletype
sprintf(buf, "HTTP/1.0 200 OK\r\n"); //line:netp:servestatic:beginserve
sprintf(buf, "%sServer: Tiny Web Server\r\n", buf);
sprintf(buf, "%sConnection: close\r\n", buf);
sprintf(buf, "%sContent-length: %d\r\n", buf, filesize);
sprintf(buf, "%sContent-type: %s\r\n\r\n", buf, filetype);
Rio_writen(fd, buf, strlen(buf)); //line:netp:servestatic:endserve
printf("Response headers:\n");
printf("%s", buf);
/* Send response body to client */
srcfd = Open(filename, O_RDONLY, 0); //line:netp:servestatic:open
srcp = (char *)Malloc(filesize);
Rio_readn(srcfd, srcp, filesize);
Close(srcfd); //line:netp:servestatic:close
Rio_writen(fd, srcp, filesize); //line:netp:servestatic:write
free(srcp);
}
第一题:有误
第二题:正确