Skip to content

Commit b267346

Browse files
committed
Byte extract lowering: support extracting non-byte-aligned arrays
Alignment to bytes may be true for ANSI C, but isn't the case for our C front-end (which supports arrays of single bits), and not true for the overall framework in general. This required fixing alignment assumptions both in unpack_array_vector (which turns an arbitrary type into an array of bytes) as well as lower_byte_extract_array_vector (which picks an array/vector of arbitrary subtype from an array of bytes as prepared by unpack_array_vector).
1 parent 280c309 commit b267346

File tree

2 files changed

+149
-46
lines changed

2 files changed

+149
-46
lines changed

src/solvers/lowering/byte_operators.cpp

Lines changed: 132 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -508,69 +508,117 @@ static exprt unpack_array_vector(
508508
// refine the number of elements to extract in case the element width is known
509509
// and a multiple of bytes; otherwise we will expand the entire array/vector
510510
optionalt<mp_integer> num_elements = src_size;
511-
if(element_bits > 0 && element_bits % 8 == 0)
511+
if(element_bits > 0)
512512
{
513513
if(!num_elements.has_value())
514514
{
515515
// turn bytes into elements, rounding up
516-
num_elements = (*max_bytes + el_bytes - 1) / el_bytes;
516+
num_elements = (*max_bytes * 8 + element_bits - 1) / element_bits;
517517
}
518518

519519
if(offset_bytes.has_value())
520520
{
521521
// compute offset as number of elements
522-
first_element = *offset_bytes / el_bytes;
523-
// insert offset_bytes-many nil bytes into the output array
522+
first_element = *offset_bytes * 8 / element_bits;
523+
// insert offset_bytes-many zero bytes into the output array
524524
byte_operands.resize(
525-
numeric_cast_v<std::size_t>(*offset_bytes - (*offset_bytes % el_bytes)),
525+
numeric_cast_v<std::size_t>(
526+
(*offset_bytes * 8 - ((*offset_bytes * 8) % element_bits)) / 8),
526527
from_integer(0, bv_typet{8}));
527528
}
528529
}
529530

530531
// the maximum number of bytes is an upper bound in case the size of the
531-
// array/vector is unknown; if element_bits was usable above this will
532+
// array/vector is unknown; if element_bits was non-zero above this will
532533
// have been turned into a number of elements already
533534
if(!num_elements)
534535
num_elements = *max_bytes;
535536

536537
const exprt src_simp = simplify_expr(src, ns);
537538

538-
for(mp_integer i = first_element; i < *num_elements; ++i)
539+
if(element_bits % 8 == 0)
539540
{
540-
exprt element;
541-
542-
if(
543-
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
544-
i < src_simp.operands().size())
541+
for(mp_integer i = first_element; i < *num_elements; ++i)
545542
{
546-
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
547-
element = src_simp.operands()[index_int];
543+
exprt element;
544+
545+
if(
546+
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
547+
i < src_simp.operands().size())
548+
{
549+
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
550+
element = src_simp.operands()[index_int];
551+
}
552+
else
553+
{
554+
element = index_exprt(src_simp, from_integer(i, index_type()));
555+
}
556+
557+
// recursively unpack each element so that we eventually just have an array
558+
// of bytes left
559+
560+
const optionalt<mp_integer> element_max_bytes =
561+
max_bytes
562+
? std::min(mp_integer{el_bytes}, *max_bytes - byte_operands.size())
563+
: optionalt<mp_integer>{};
564+
const std::size_t element_max_bytes_int =
565+
element_max_bytes ? numeric_cast_v<std::size_t>(*element_max_bytes)
566+
: el_bytes;
567+
568+
exprt sub =
569+
unpack_rec(element, little_endian, {}, element_max_bytes, ns, true);
570+
exprt::operandst sub_operands =
571+
instantiate_byte_array(sub, 0, element_max_bytes_int, ns);
572+
byte_operands.insert(
573+
byte_operands.end(), sub_operands.begin(), sub_operands.end());
574+
575+
if(max_bytes && byte_operands.size() >= *max_bytes)
576+
break;
548577
}
549-
else
578+
}
579+
else
580+
{
581+
const std::size_t element_bits_int =
582+
numeric_cast_v<std::size_t>(element_bits);
583+
584+
exprt::operandst elements;
585+
for(mp_integer i = *num_elements - 1; i >= first_element; --i)
550586
{
551-
element = index_exprt(src_simp, from_integer(i, index_type()));
587+
if(
588+
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
589+
i < src_simp.operands().size())
590+
{
591+
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
592+
elements.push_back(typecast_exprt::conditional_cast(
593+
src_simp.operands()[index_int], bv_typet{element_bits_int}));
594+
}
595+
else
596+
{
597+
elements.push_back(typecast_exprt::conditional_cast(
598+
index_exprt(src_simp, from_integer(i, index_type())),
599+
bv_typet{element_bits_int}));
600+
}
552601
}
553602

554-
// recursively unpack each element so that we eventually just have an array
555-
// of bytes left
603+
const std::size_t merged_bit_width = numeric_cast_v<std::size_t>(
604+
(*num_elements - first_element) * element_bits);
605+
if(!little_endian)
606+
std::reverse(elements.begin(), elements.end());
607+
concatenation_exprt merged{std::move(elements), bv_typet{merged_bit_width}};
556608

557-
const optionalt<mp_integer> element_max_bytes =
558-
max_bytes
559-
? std::min(mp_integer{el_bytes}, *max_bytes - byte_operands.size())
560-
: optionalt<mp_integer>{};
561-
const std::size_t element_max_bytes_int =
562-
element_max_bytes ? numeric_cast_v<std::size_t>(*element_max_bytes)
563-
: el_bytes;
609+
exprt merged_as_bytes =
610+
unpack_rec(merged, little_endian, {}, max_bytes, ns, true);
611+
612+
const std::size_t merged_byte_width = (merged_bit_width + 7) / 8;
613+
const std::size_t max_bytes_int =
614+
max_bytes.has_value()
615+
? std::min(numeric_cast_v<std::size_t>(*max_bytes), merged_byte_width)
616+
: merged_byte_width;
564617

565-
exprt sub =
566-
unpack_rec(element, little_endian, {}, element_max_bytes, ns, true);
567618
exprt::operandst sub_operands =
568-
instantiate_byte_array(sub, 0, element_max_bytes_int, ns);
619+
instantiate_byte_array(merged_as_bytes, 0, max_bytes_int, ns);
569620
byte_operands.insert(
570621
byte_operands.end(), sub_operands.begin(), sub_operands.end());
571-
572-
if(max_bytes && byte_operands.size() >= *max_bytes)
573-
break;
574622
}
575623

576624
const std::size_t size = byte_operands.size();
@@ -973,15 +1021,51 @@ static exprt lower_byte_extract_array_vector(
9731021
operands.reserve(*num_elements);
9741022
for(std::size_t i = 0; i < *num_elements; ++i)
9751023
{
976-
plus_exprt new_offset(
977-
unpacked.offset(),
978-
from_integer(i * element_bits / 8, unpacked.offset().type()));
1024+
if(element_bits % 8 == 0)
1025+
{
1026+
plus_exprt new_offset(
1027+
unpacked.offset(),
1028+
from_integer(i * element_bits / 8, unpacked.offset().type()));
9791029

980-
byte_extract_exprt tmp(unpacked);
981-
tmp.type() = subtype;
982-
tmp.offset() = new_offset;
1030+
byte_extract_exprt tmp(unpacked);
1031+
tmp.type() = subtype;
1032+
tmp.offset() = new_offset;
9831033

984-
operands.push_back(lower_byte_extract(tmp, ns));
1034+
operands.push_back(lower_byte_extract(tmp, ns));
1035+
}
1036+
else
1037+
{
1038+
const mp_integer first_index = i * element_bits / 8;
1039+
const mp_integer last_index = ((i + 1) * element_bits + 7) / 8;
1040+
1041+
exprt::operandst to_concat;
1042+
to_concat.reserve(
1043+
numeric_cast_v<std::size_t>(last_index - first_index));
1044+
for(mp_integer j = first_index; j < last_index; ++j)
1045+
{
1046+
to_concat.push_back(
1047+
index_exprt{unpacked.op(),
1048+
plus_exprt{unpacked.offset(),
1049+
from_integer(j, unpacked.offset().type())}});
1050+
}
1051+
1052+
const std::size_t base =
1053+
numeric_cast_v<std::size_t>((i * element_bits) % 8);
1054+
const std::size_t last =
1055+
numeric_cast_v<std::size_t>(base + element_bits - 1);
1056+
endianness_mapt endianness_map(
1057+
bv_typet{8 * to_concat.size()},
1058+
src.id() == ID_byte_extract_little_endian,
1059+
ns);
1060+
const auto bounds = map_bounds(endianness_map, base, last);
1061+
extractbits_exprt element{
1062+
concatenation_exprt{to_concat, bv_typet{8 * to_concat.size()}},
1063+
from_integer(bounds.ub, size_type()),
1064+
from_integer(bounds.lb, size_type()),
1065+
subtype};
1066+
1067+
operands.push_back(element);
1068+
}
9851069
}
9861070

9871071
exprt result;
@@ -1005,6 +1089,10 @@ static exprt lower_byte_extract_array_vector(
10051089
std::to_string(array_comprehension_index_counter),
10061090
index_type()};
10071091

1092+
PRECONDITION_WITH_DIAGNOSTICS(
1093+
element_bits % 8 == 0,
1094+
"byte extracting non-byte-aligned arrays requires constant size");
1095+
10081096
plus_exprt new_offset{
10091097
unpacked.offset(),
10101098
typecast_exprt::conditional_cast(
@@ -1142,15 +1230,13 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
11421230
{
11431231
const typet &subtype = to_type_with_subtype(src.type()).subtype();
11441232

1145-
// consider ways of dealing with arrays of unknown subtype size or with a
1146-
// subtype size that does not fit byte boundaries; currently we fall back to
1147-
// stitching together consecutive elements down below
11481233
auto element_bits = pointer_offset_bits(subtype, ns);
1149-
if(element_bits.has_value() && *element_bits >= 1 && *element_bits % 8 == 0)
1150-
{
1151-
return lower_byte_extract_array_vector(
1152-
src, unpacked, subtype, *element_bits, ns);
1153-
}
1234+
PRECONDITION_WITH_DIAGNOSTICS(
1235+
element_bits.has_value() && *element_bits >= 1,
1236+
"byte extracting arrays of unknown/zero subtype is not yet implemented");
1237+
1238+
return lower_byte_extract_array_vector(
1239+
src, unpacked, subtype, *element_bits, ns);
11541240
}
11551241
else if(src.type().id() == ID_complex)
11561242
{

unit/solvers/lowering/byte_operators.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ TEST_CASE("byte extract and bits", "[core][solvers][lowering][byte_extract]")
6262
bit_array_type};
6363
const exprt lower_be1 = lower_byte_extract(be1, ns);
6464
REQUIRE(lower_be1 == *array_of_bits);
65+
66+
const byte_extract_exprt be2{little_endian ? ID_byte_extract_little_endian
67+
: ID_byte_extract_big_endian,
68+
*array_of_bits,
69+
from_integer(0, index_type()),
70+
u16};
71+
const exprt lower_be2 = lower_byte_extract(be2, ns);
72+
REQUIRE(lower_be2 == sixteen_bits);
6573
}
6674

6775
GIVEN("Big endian")
@@ -87,6 +95,14 @@ TEST_CASE("byte extract and bits", "[core][solvers][lowering][byte_extract]")
8795
bit_array_type};
8896
const exprt lower_be1 = lower_byte_extract(be1, ns);
8997
REQUIRE(lower_be1 == *array_of_bits);
98+
99+
const byte_extract_exprt be2{little_endian ? ID_byte_extract_little_endian
100+
: ID_byte_extract_big_endian,
101+
*array_of_bits,
102+
from_integer(0, index_type()),
103+
u16};
104+
const exprt lower_be2 = lower_byte_extract(be2, ns);
105+
REQUIRE(lower_be2 == sixteen_bits);
90106
}
91107
}
92108

@@ -262,6 +278,7 @@ SCENARIO("byte_extract_lowering", "[core][solvers][lowering][byte_extract]")
262278
union_typet({{"compA", u32}, {"compB", u64}}),
263279
c_enum_typet(u16),
264280
c_enum_typet(unsignedbv_typet(128)),
281+
array_typet{bv_typet{1}, from_integer(128, size_type())},
265282
array_typet(u8, size),
266283
array_typet(s32, size),
267284
array_typet(u64, size),

0 commit comments

Comments
 (0)