Skip to content

Commit

Permalink
Merge remote-tracking branch 'xilinx/dev' into feature/attention-stre…
Browse files Browse the repository at this point in the history
…amline
  • Loading branch information
iksnagreb committed Jan 20, 2025
2 parents 6c56382 + 88e207e commit 15a9daa
Show file tree
Hide file tree
Showing 115 changed files with 3,364 additions and 1,508 deletions.
1 change: 1 addition & 0 deletions .github/workflows/quicktest-dev-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ jobs:
export FINN_ROOT=$(pwd)
export FINN_BUILD_DIR=/tmp/finn_gha
export FINN_INST_NAME=finn_gha
export FINN_SKIP_XRT_DOWNLOAD=1
./run-docker.sh quicktest
3 changes: 3 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,6 @@ sphinx:
python:
install:
- requirements: docs/requirements.txt

formats:
- pdf
3 changes: 3 additions & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,6 @@ Contributors
* Shashwat Khandelwal (@shashwat1198)
* Ian Colbert (@i-colbert)
* Rachit Garg (@rstar900)
* Christoph Berganski (@iksnagreb)
* Jonas Kuehle (@vopade)
* Aditya S (@Adityasrinivas24)
21 changes: 15 additions & 6 deletions docker/Dockerfile.finn
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ FROM ubuntu:jammy-20230126
LABEL maintainer="Jakoba Petri-Koenig <jakoba.petri-koenig@amd.com>, Yaman Umuroglu <yaman.umuroglu@amd.com>"

ARG XRT_DEB_VERSION="xrt_202220.2.14.354_22.04-amd64-xrt"
ARG SKIP_XRT
ARG LOCAL_XRT

WORKDIR /workspace

Expand Down Expand Up @@ -78,15 +80,19 @@ RUN cd verilator && \
make install

# install XRT
RUN wget https://www.xilinx.com/bin/public/openDownload?filename=$XRT_DEB_VERSION.deb -O /tmp/$XRT_DEB_VERSION.deb
RUN apt install -y /tmp/$XRT_DEB_VERSION.deb
RUN rm /tmp/$XRT_DEB_VERSION.deb
RUN if [ -z "$LOCAL_XRT" ] && [ -z "$SKIP_XRT" ];then \
wget -U 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17' "https://www.xilinx.com/bin/public/openDownload?filename=$XRT_DEB_VERSION.deb" -O /tmp/$XRT_DEB_VERSION.deb; fi

COPY requirements.txt $XRT_DEB_VERSION.* /tmp/

RUN if [ -z "$SKIP_XRT" ];then \
apt install -y /tmp/$XRT_DEB_VERSION.deb && \
rm /tmp/$XRT_DEB_VERSION.deb; fi

# versioned Python package requirements for FINN compiler
# these are given in requirements.txt
COPY requirements.txt .
RUN pip install -r requirements.txt
RUN rm requirements.txt
RUN pip install -r /tmp/requirements.txt
RUN rm /tmp/requirements.txt

# install PyTorch
RUN pip install torch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
Expand Down Expand Up @@ -126,6 +132,9 @@ RUN pip install tokenize-rt==4.2.1
# pyverilator
RUN pip install tclwrapper==0.0.1

# assure that we have the right setuptools version
RUN pip install setuptools==68.2.2

# extra environment variables for FINN compiler
ENV VIVADO_IP_CACHE "/tmp/vivado_ip_cache"

Expand Down
2 changes: 1 addition & 1 deletion docker/finn_entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ if [ -f "$VITIS_PATH/settings64.sh" ];then
source $XILINX_XRT/setup.sh
gecho "Found XRT at $XILINX_XRT"
else
recho "XRT not found on $XILINX_XRT, did the installation fail?"
recho "XRT not found on $XILINX_XRT, did you skip the download or did the installation fail?"
exit -1
fi
else
Expand Down
2 changes: 1 addition & 1 deletion docs/finn/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ Which data layout do FINN-generated accelerators use? Big-endian? Little-endian?
If you need to do this manually, first examine how the `FINN PYNQ Python drivers <https://github.com/Xilinx/finn-examples/blob/main/finn_examples/driver.py#L379>`_ do this – notice how the input data is
first reshaped to create the “folded input shape” that reflects the word size of the first layer based on how much it
was parallelized, then data packing is applied to obtain a raw byte array (with some reversals going on) that can be
fed directly to the hardware. Another example of this is the `npy_to_rtlsim_input <https://github.com/Xilinx/finn-base/blob/dev/src/finn/util/data_packing.py#L289>`_ function, which converts npy arrays to lists of Python arbitrary-precision integers that we feed into pyverilator for rtl simulation.
fed directly to the hardware. Another example of this is the `npy_to_rtlsim_input <https://github.com/Xilinx/finn/blob/dev/src/finn/util/data_packing.py#L284>`_ function, which converts npy arrays to lists of Python arbitrary-precision integers that we feed into pyverilator for rtl simulation.

Why does FIFO sizing take so long for my network? Is something wrong?
The automatic FIFO sizing in FINN can take quite long. It unfortunately doesn’t really parallelize on multiple cores since
Expand Down
4 changes: 2 additions & 2 deletions fetch-repos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

QONNX_COMMIT="1a4957ebf2aaf139217fd56109386d4518dd6127"
FINN_EXP_COMMIT="de99347e936d51715f5356a1b6c64e37b91c23c2"
BREVITAS_COMMIT="84f42259ec869eb151af4cb8a8b23ad925f493db"
FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851"
BREVITAS_COMMIT="d4834bd2a0fad3c1fbc0ff7e1346d5dcb3797ea4"
PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1"
CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4"
HLSLIB_COMMIT="16e5847a5e3ef76cffe84c8fad2f010d593457d3"
Expand Down
153 changes: 77 additions & 76 deletions finn-rtllib/fifo/hdl/Q_srl.v
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count, maxcount);
parameter depth = 16; // - greatest #items in queue (2 <= depth <= 256)
parameter width = 16; // - width of data (i_d, o_d)

parameter addrwidth = $clog2(depth);
localparam countwidth = $clog2(depth + 1);
localparam addrwidth = $clog2(depth);

input clock;
input reset;
Expand All @@ -89,10 +90,10 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count, maxcount);
input o_r; // - output stream ready
wire o_b; // - output stream back-pressure

output [addrwidth:0] count; // - output number of elems in queue
output [addrwidth:0] maxcount; // - maximum observed count since reset
output [countwidth-1:0] count; // - output number of elems in queue
output [countwidth-1:0] maxcount; // - maximum observed count since reset

reg [addrwidth:0] maxcount_reg; // - maximum count seen until now
reg [countwidth-1:0] maxcount_reg; // - maximum count seen until now
reg [addrwidth-1:0] addr, addr_, a_; // - SRL16 address
// for data output
reg shift_en_; // - SRL16 shift enable
Expand Down Expand Up @@ -183,58 +184,58 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count, maxcount);
end // always @ (posedge clock or negedge reset)

always @* begin // - combi always
srlo_ <= 'bx;
shift_en_o_ <= 1'bx;
shift_en_ <= 1'bx;
addr_ <= 'bx;
state_ <= 2'bx;
srlo_ = 'bx;
shift_en_o_ = 1'bx;
shift_en_ = 1'bx;
addr_ = 'bx;
state_ = 2'bx;
case (state)

state_empty: begin // - (empty, will not produce)
if (i_v) begin // - empty & i_v => consume
srlo_ <= i_d;
shift_en_o_ <= 1;
shift_en_ <= 1'bx;
addr_ <= 0;
state_ <= state_one;
srlo_ = i_d;
shift_en_o_ = 1;
shift_en_ = 1'bx;
addr_ = 0;
state_ = state_one;
end
else begin // - empty & !i_v => idle
srlo_ <= 'bx;
shift_en_o_ <= 0;
shift_en_ <= 1'bx;
addr_ <= 0;
state_ <= state_empty;
srlo_ = 'bx;
shift_en_o_ = 0;
shift_en_ = 1'bx;
addr_ = 0;
state_ = state_empty;
end
end

state_one: begin // - (contains one)
if (i_v && o_b) begin // - one & i_v & o_b => consume
srlo_ <= 'bx;
shift_en_o_ <= 0;
shift_en_ <= 1;
addr_ <= 0;
state_ <= state_more;
srlo_ = 'bx;
shift_en_o_ = 0;
shift_en_ = 1;
addr_ = 0;
state_ = state_more;
end
else if (i_v && !o_b) begin // - one & i_v & !o_b => cons+prod
srlo_ <= i_d;
shift_en_o_ <= 1;
shift_en_ <= 1;
addr_ <= 0;
state_ <= state_one;
srlo_ = i_d;
shift_en_o_ = 1;
shift_en_ = 1;
addr_ = 0;
state_ = state_one;
end
else if (!i_v && o_b) begin // - one & !i_v & o_b => idle
srlo_ <= 'bx;
shift_en_o_ <= 0;
shift_en_ <= 1'bx;
addr_ <= 0;
state_ <= state_one;
srlo_ = 'bx;
shift_en_o_ = 0;
shift_en_ = 1'bx;
addr_ = 0;
state_ = state_one;
end
else if (!i_v && !o_b) begin // - one & !i_v & !o_b => produce
srlo_ <= 'bx;
shift_en_o_ <= 0;
shift_en_ <= 1'bx;
addr_ <= 0;
state_ <= state_empty;
srlo_ = 'bx;
shift_en_o_ = 0;
shift_en_ = 1'bx;
addr_ = 0;
state_ = state_empty;
end
end // case: state_one

Expand All @@ -243,60 +244,60 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count, maxcount);
// - (full, will not consume)
// - (full here if depth==2)
if (o_b) begin // - full & o_b => idle
srlo_ <= 'bx;
shift_en_o_ <= 0;
shift_en_ <= 0;
addr_ <= addr;
state_ <= state_more;
srlo_ = 'bx;
shift_en_o_ = 0;
shift_en_ = 0;
addr_ = addr;
state_ = state_more;
end
else begin // - full & !o_b => produce
srlo_ <= srl[addr];
shift_en_o_ <= 1;
shift_en_ <= 0;
// addr_ <= addr-1;
// state_ <= state_more;
addr_ <= addr_zero_ ? 0 : addr-1;
state_ <= addr_zero_ ? state_one : state_more;
srlo_ = srl[addr];
shift_en_o_ = 1;
shift_en_ = 0;
// addr_ = addr-1;
// state_ = state_more;
addr_ = addr_zero_ ? 0 : addr-1;
state_ = addr_zero_ ? state_one : state_more;
end
end
else begin // - (mid: neither empty nor full)
if (i_v && o_b) begin // - mid & i_v & o_b => consume
srlo_ <= 'bx;
shift_en_o_ <= 0;
shift_en_ <= 1;
addr_ <= addr+1;
state_ <= state_more;
srlo_ = 'bx;
shift_en_o_ = 0;
shift_en_ = 1;
addr_ = addr+1;
state_ = state_more;
end
else if (i_v && !o_b) begin // - mid & i_v & !o_b => cons+prod
srlo_ <= srl[addr];
shift_en_o_ <= 1;
shift_en_ <= 1;
addr_ <= addr;
state_ <= state_more;
srlo_ = srl[addr];
shift_en_o_ = 1;
shift_en_ = 1;
addr_ = addr;
state_ = state_more;
end
else if (!i_v && o_b) begin // - mid & !i_v & o_b => idle
srlo_ <= 'bx;
shift_en_o_ <= 0;
shift_en_ <= 0;
addr_ <= addr;
state_ <= state_more;
srlo_ = 'bx;
shift_en_o_ = 0;
shift_en_ = 0;
addr_ = addr;
state_ = state_more;
end
else if (!i_v && !o_b) begin // - mid & !i_v & !o_b => produce
srlo_ <= srl[addr];
shift_en_o_ <= 1;
shift_en_ <= 0;
addr_ <= addr_zero_ ? 0 : addr-1;
state_ <= addr_zero_ ? state_one : state_more;
srlo_ = srl[addr];
shift_en_o_ = 1;
shift_en_ = 0;
addr_ = addr_zero_ ? 0 : addr-1;
state_ = addr_zero_ ? state_one : state_more;
end
end // else: !if(addr_full)
end // case: state_more

default: begin
srlo_ <= 'bx;
shift_en_o_ <= 1'bx;
shift_en_ <= 1'bx;
addr_ <= 'bx;
state_ <= 2'bx;
srlo_ = 'bx;
shift_en_o_ = 1'bx;
shift_en_ = 1'bx;
addr_ = 'bx;
state_ = 2'bx;
end // case: default

endcase // case(state)
Expand Down
Loading

0 comments on commit 15a9daa

Please # to comment.