Skip to content

Commit 383258b

Browse files
committed
Byte extract lowering: support extracting non-byte-aligned arrays
Alignment to bytes may be true for ANSI C, but isn't the case for our C front-end (which supports arrays of single bits), and not true for the overall framework in general. This required fixing alignment assumptions both in unpack_array_vector (which turns an arbitrary type into an array of bytes) as well as lower_byte_extract_array_vector (which picks an array/vector of arbitrary subtype from an array of bytes as prepared by unpack_array_vector).
1 parent 2a833ca commit 383258b

File tree

2 files changed

+162
-51
lines changed

2 files changed

+162
-51
lines changed

src/solvers/lowering/byte_operators.cpp

Lines changed: 141 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -551,69 +551,121 @@ static exprt unpack_array_vector(
551551
// refine the number of elements to extract in case the element width is known
552552
// and a multiple of bytes; otherwise we will expand the entire array/vector
553553
optionalt<mp_integer> num_elements = src_size;
554-
if(element_bits > 0 && element_bits % bits_per_byte == 0)
554+
if(element_bits > 0)
555555
{
556556
if(!num_elements.has_value())
557557
{
558558
// turn bytes into elements, rounding up
559-
num_elements = (*max_bytes + el_bytes - 1) / el_bytes;
559+
num_elements =
560+
(*max_bytes * bits_per_byte + element_bits - 1) / element_bits;
560561
}
561562

562563
if(offset_bytes.has_value())
563564
{
564565
// compute offset as number of elements
565-
first_element = *offset_bytes / el_bytes;
566-
// insert offset_bytes-many nil bytes into the output array
566+
first_element = *offset_bytes * bits_per_byte / element_bits;
567+
// insert offset_bytes-many zero bytes into the output array
567568
byte_operands.resize(
568-
numeric_cast_v<std::size_t>(*offset_bytes - (*offset_bytes % el_bytes)),
569+
numeric_cast_v<std::size_t>(
570+
(*offset_bytes * bits_per_byte -
571+
((*offset_bytes * bits_per_byte) % element_bits)) /
572+
bits_per_byte),
569573
from_integer(0, bv_typet{bits_per_byte}));
570574
}
571575
}
572576

573577
// the maximum number of bytes is an upper bound in case the size of the
574-
// array/vector is unknown; if element_bits was usable above this will
578+
// array/vector is unknown; if element_bits was non-zero above this will
575579
// have been turned into a number of elements already
576580
if(!num_elements)
577581
num_elements = *max_bytes;
578582

579583
const exprt src_simp = simplify_expr(src, ns);
580584

581-
for(mp_integer i = first_element; i < *num_elements; ++i)
585+
if(element_bits % bits_per_byte == 0)
582586
{
583-
exprt element;
584-
585-
if(
586-
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
587-
i < src_simp.operands().size())
587+
for(mp_integer i = first_element; i < *num_elements; ++i)
588588
{
589-
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
590-
element = src_simp.operands()[index_int];
589+
exprt element;
590+
591+
if(
592+
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
593+
i < src_simp.operands().size())
594+
{
595+
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
596+
element = src_simp.operands()[index_int];
597+
}
598+
else
599+
{
600+
element = index_exprt(src_simp, from_integer(i, index_type()));
601+
}
602+
603+
// recursively unpack each element so that we eventually just have an
604+
// array of bytes left
605+
606+
const optionalt<mp_integer> element_max_bytes =
607+
max_bytes
608+
? std::min(mp_integer{el_bytes}, *max_bytes - byte_operands.size())
609+
: optionalt<mp_integer>{};
610+
const std::size_t element_max_bytes_int =
611+
element_max_bytes ? numeric_cast_v<std::size_t>(*element_max_bytes)
612+
: el_bytes;
613+
614+
exprt sub = unpack_rec(
615+
element, little_endian, {}, element_max_bytes, bits_per_byte, ns, true);
616+
exprt::operandst sub_operands = instantiate_byte_array(
617+
sub, 0, element_max_bytes_int, bits_per_byte, ns);
618+
byte_operands.insert(
619+
byte_operands.end(), sub_operands.begin(), sub_operands.end());
620+
621+
if(max_bytes && byte_operands.size() >= *max_bytes)
622+
break;
591623
}
592-
else
624+
}
625+
else
626+
{
627+
const std::size_t element_bits_int =
628+
numeric_cast_v<std::size_t>(element_bits);
629+
630+
exprt::operandst elements;
631+
for(mp_integer i = *num_elements - 1; i >= first_element; --i)
593632
{
594-
element = index_exprt(src_simp, from_integer(i, c_index_type()));
633+
if(
634+
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
635+
i < src_simp.operands().size())
636+
{
637+
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
638+
elements.push_back(typecast_exprt::conditional_cast(
639+
src_simp.operands()[index_int], bv_typet{element_bits_int}));
640+
}
641+
else
642+
{
643+
elements.push_back(typecast_exprt::conditional_cast(
644+
index_exprt(src_simp, from_integer(i, c_index_type())),
645+
bv_typet{element_bits_int}));
646+
}
595647
}
596648

597-
// recursively unpack each element so that we eventually just have an array
598-
// of bytes left
649+
const std::size_t merged_bit_width = numeric_cast_v<std::size_t>(
650+
(*num_elements - first_element) * element_bits);
651+
if(!little_endian)
652+
std::reverse(elements.begin(), elements.end());
653+
concatenation_exprt merged{std::move(elements), bv_typet{merged_bit_width}};
599654

600-
const optionalt<mp_integer> element_max_bytes =
601-
max_bytes
602-
? std::min(mp_integer{el_bytes}, *max_bytes - byte_operands.size())
603-
: optionalt<mp_integer>{};
604-
const std::size_t element_max_bytes_int =
605-
element_max_bytes ? numeric_cast_v<std::size_t>(*element_max_bytes)
606-
: el_bytes;
655+
exprt merged_as_bytes =
656+
unpack_rec(merged, little_endian, {}, max_bytes, bits_per_byte, ns, true);
607657

608-
exprt sub = unpack_rec(
609-
element, little_endian, {}, element_max_bytes, bits_per_byte, ns, true);
610-
exprt::operandst sub_operands =
611-
instantiate_byte_array(sub, 0, element_max_bytes_int, bits_per_byte, ns);
658+
const std::size_t merged_byte_width =
659+
(merged_bit_width + bits_per_byte - 1) / bits_per_byte;
660+
const std::size_t max_bytes_int =
661+
max_bytes.has_value()
662+
? std::min(numeric_cast_v<std::size_t>(*max_bytes), merged_byte_width)
663+
: merged_byte_width;
664+
665+
exprt::operandst sub_operands = instantiate_byte_array(
666+
merged_as_bytes, 0, max_bytes_int, bits_per_byte, ns);
612667
byte_operands.insert(
613668
byte_operands.end(), sub_operands.begin(), sub_operands.end());
614-
615-
if(max_bytes && byte_operands.size() >= *max_bytes)
616-
break;
617669
}
618670

619671
const std::size_t size = byte_operands.size();
@@ -1072,17 +1124,55 @@ static exprt lower_byte_extract_array_vector(
10721124
operands.reserve(*num_elements);
10731125
for(std::size_t i = 0; i < *num_elements; ++i)
10741126
{
1075-
plus_exprt new_offset(
1076-
unpacked.offset(),
1077-
from_integer(
1078-
i * element_bits / src.get_bits_per_byte(),
1079-
unpacked.offset().type()));
1127+
if(element_bits % src.get_bits_per_byte() == 0)
1128+
{
1129+
plus_exprt new_offset(
1130+
unpacked.offset(),
1131+
from_integer(
1132+
i * element_bits / src.get_bits_per_byte(),
1133+
unpacked.offset().type()));
10801134

1081-
byte_extract_exprt tmp(unpacked);
1082-
tmp.type() = subtype;
1083-
tmp.offset() = new_offset;
1135+
byte_extract_exprt tmp(unpacked);
1136+
tmp.type() = subtype;
1137+
tmp.offset() = new_offset;
10841138

1085-
operands.push_back(lower_byte_extract(tmp, ns));
1139+
operands.push_back(lower_byte_extract(tmp, ns));
1140+
}
1141+
else
1142+
{
1143+
const mp_integer first_index =
1144+
i * element_bits / src.get_bits_per_byte();
1145+
const mp_integer last_index =
1146+
((i + 1) * element_bits + src.get_bits_per_byte() - 1) /
1147+
src.get_bits_per_byte();
1148+
1149+
exprt::operandst to_concat;
1150+
to_concat.reserve(
1151+
numeric_cast_v<std::size_t>(last_index - first_index));
1152+
for(mp_integer j = first_index; j < last_index; ++j)
1153+
{
1154+
to_concat.push_back(index_exprt{
1155+
unpacked.op(),
1156+
plus_exprt{
1157+
unpacked.offset(), from_integer(j, unpacked.offset().type())}});
1158+
}
1159+
1160+
const std::size_t base = numeric_cast_v<std::size_t>(
1161+
(i * element_bits) % src.get_bits_per_byte());
1162+
const std::size_t last =
1163+
numeric_cast_v<std::size_t>(base + element_bits - 1);
1164+
bv_typet concat_type{src.get_bits_per_byte() * to_concat.size()};
1165+
endianness_mapt endianness_map(
1166+
concat_type, src.id() == ID_byte_extract_little_endian, ns);
1167+
const auto bounds = map_bounds(endianness_map, base, last);
1168+
extractbits_exprt element{
1169+
concatenation_exprt{to_concat, std::move(concat_type)},
1170+
from_integer(bounds.ub, size_type()),
1171+
from_integer(bounds.lb, size_type()),
1172+
subtype};
1173+
1174+
operands.push_back(element);
1175+
}
10861176
}
10871177

10881178
exprt result;
@@ -1106,6 +1196,10 @@ static exprt lower_byte_extract_array_vector(
11061196
std::to_string(array_comprehension_index_counter),
11071197
c_index_type()};
11081198

1199+
PRECONDITION_WITH_DIAGNOSTICS(
1200+
element_bits % src.get_bits_per_byte() == 0,
1201+
"byte extracting non-byte-aligned arrays requires constant size");
1202+
11091203
plus_exprt new_offset{
11101204
unpacked.offset(),
11111205
typecast_exprt::conditional_cast(
@@ -1251,17 +1345,13 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
12511345
{
12521346
const typet &subtype = to_type_with_subtype(src.type()).subtype();
12531347

1254-
// consider ways of dealing with arrays of unknown subtype size or with a
1255-
// subtype size that does not fit byte boundaries; currently we fall back to
1256-
// stitching together consecutive elements down below
12571348
auto element_bits = pointer_offset_bits(subtype, ns);
1258-
if(
1259-
element_bits.has_value() && *element_bits >= 1 &&
1260-
*element_bits % src.get_bits_per_byte() == 0)
1261-
{
1262-
return lower_byte_extract_array_vector(
1263-
src, unpacked, subtype, *element_bits, ns);
1264-
}
1349+
PRECONDITION_WITH_DIAGNOSTICS(
1350+
element_bits.has_value() && *element_bits >= 1,
1351+
"byte extracting arrays of unknown/zero subtype is not yet implemented");
1352+
1353+
return lower_byte_extract_array_vector(
1354+
src, unpacked, subtype, *element_bits, ns);
12651355
}
12661356
else if(src.type().id() == ID_complex)
12671357
{

unit/solvers/lowering/byte_operators.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,16 @@ TEST_CASE("byte extract and bits", "[core][solvers][lowering][byte_extract]")
6464
bit_array_type};
6565
const exprt lower_be1 = lower_byte_extract(be1, ns);
6666
REQUIRE(lower_be1 == *array_of_bits);
67+
68+
const byte_extract_exprt be2{
69+
little_endian ? ID_byte_extract_little_endian
70+
: ID_byte_extract_big_endian,
71+
*array_of_bits,
72+
from_integer(0, index_type()),
73+
config.ansi_c.char_width,
74+
u16};
75+
const exprt lower_be2 = lower_byte_extract(be2, ns);
76+
REQUIRE(lower_be2 == sixteen_bits);
6777
}
6878

6979
GIVEN("Big endian")
@@ -91,6 +101,16 @@ TEST_CASE("byte extract and bits", "[core][solvers][lowering][byte_extract]")
91101
bit_array_type};
92102
const exprt lower_be1 = lower_byte_extract(be1, ns);
93103
REQUIRE(lower_be1 == *array_of_bits);
104+
105+
const byte_extract_exprt be2{
106+
little_endian ? ID_byte_extract_little_endian
107+
: ID_byte_extract_big_endian,
108+
*array_of_bits,
109+
from_integer(0, index_type()),
110+
config.ansi_c.char_width,
111+
u16};
112+
const exprt lower_be2 = lower_byte_extract(be2, ns);
113+
REQUIRE(lower_be2 == sixteen_bits);
94114
}
95115
}
96116

@@ -272,6 +292,7 @@ SCENARIO("byte_extract_lowering", "[core][solvers][lowering][byte_extract]")
272292
union_typet({{"compA", u32}, {"compB", u64}}),
273293
c_enum_typet(u16),
274294
c_enum_typet(unsignedbv_typet(128)),
295+
array_typet{bv_typet{1}, from_integer(128, size_type())},
275296
array_typet(u8, size),
276297
array_typet(s32, size),
277298
array_typet(u64, size),

0 commit comments

Comments
 (0)