Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

JSON output format, closes #39 #41

Merged
merged 7 commits into from
Dec 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,5 @@ indent_style = space
[*.{c,h,txt,cmake}]
indent_size = 4

[configure.ac]
indent_size = 4

[*.{md,yml}]
[*.{md,yml,json}]
indent_size = 2

[{Makefile.am,*.mk}]
indent_style = tab
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ foreach(bin rifiuti rifiuti-vista)
PRIVATE
src/utils.c
src/utils.h
src/utils-conv.c
src/utils-conv.h
)
if(WIN32)
target_sources(${bin}
Expand Down
99 changes: 52 additions & 47 deletions src/rifiuti-vista.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
* Please see LICENSE file for more info.
*/

#include <stdbool.h>
#include <glib/gi18n.h>
#include <glib/gstdio.h>

#include "rifiuti-vista.h"
#include "utils-conv.h"
#include "utils.h"
#ifdef G_OS_WIN32
# include "utils-win.h"
Expand Down Expand Up @@ -92,7 +94,7 @@ _validate_index_file (const char *filename,
pathlen = GUINT32_FROM_LE (pathlen);

/* Header length + strlen in UTF-16 encoding */
expect_sz = VERSION2_FILENAME_OFFSET + pathlen * 2;
expect_sz = VERSION2_FILENAME_OFFSET + pathlen * sizeof(gunichar2);
if (*bufsize != expect_sz)
{
g_debug ("File size = %" G_GSIZE_FORMAT
Expand Down Expand Up @@ -124,22 +126,43 @@ _validate_index_file (const char *filename,

static rbin_struct *
_populate_record_data (void *buf,
uint64_t version,
gboolean erraneous)
gsize bufsize,
uint64_t version)
{
rbin_struct *record;
size_t read;
size_t pathbuf_sz = 0;
void *pathbuf_start = NULL;
bool erraneous = false;

switch (version)
{
case VERSION_VISTA:
// In rare cases, the size of index file is one byte short of
// (fixed) 544 bytes in Vista. Under such occasion, file size
// only occupies 56 bit, not 64 bit as it ought to be.
// Actually this 56-bit file size is very likely wrong after all.
// This is observed during deletion of dd.exe from Forensic
// Acquisition Utilities (by George M. Garner Jr)
// in certain localized Vista.
if (bufsize == VERSION1_FILE_SIZE - 1)
erraneous = true;

pathbuf_sz = WIN_PATH_MAX * sizeof(gunichar2);
pathbuf_start = buf - (int)erraneous + VERSION1_FILENAME_OFFSET;
break;

case VERSION_WIN10:
pathbuf_sz = bufsize - VERSION2_FILENAME_OFFSET;
pathbuf_start = buf + VERSION2_FILENAME_OFFSET;
break;

default:
g_assert_not_reached ();
}

record = g_malloc0 (sizeof (rbin_struct));
record->version = version;

/*
* In rare cases, the size of index file is 543 bytes versus (normal) 544 bytes.
* In such occasion file size only occupies 56 bit, not 64 bit as it ought to be.
* Actually this 56-bit file size is very likely wrong after all. Probably some
* bug inside Windows. This is observed during deletion of dd.exe from Forensic
* Acquisition Utilities (by George M. Garner Jr) in certain localized Vista.
*/
memcpy (&record->filesize, buf + FILESIZE_OFFSET,
FILETIME_OFFSET - FILESIZE_OFFSET - (int) erraneous);
if (erraneous)
Expand All @@ -156,34 +179,30 @@ _populate_record_data (void *buf,
}

/* File deletion time */
memcpy (&record->winfiletime, buf + FILETIME_OFFSET - (int) erraneous,
memcpy (&record->winfiletime, buf - (int) erraneous + FILETIME_OFFSET,
VERSION1_FILENAME_OFFSET - FILETIME_OFFSET);
record->winfiletime = GINT64_FROM_LE (record->winfiletime);
record->deltime = win_filetime_to_gdatetime (record->winfiletime);

switch (version)
{
case VERSION_VISTA:
record->uni_path = conv_path_to_utf8_with_tmpl (
(const char *) (buf - erraneous + VERSION1_FILENAME_OFFSET),
NULL, "<\\u%04X>", &read, &record->error);
break;

case VERSION_WIN10:
record->uni_path = conv_path_to_utf8_with_tmpl (
(const char *) (buf + VERSION2_FILENAME_OFFSET),
NULL, "<\\u%04X>", &read, &record->error);
break;
record->raw_uni_path = g_malloc0 (pathbuf_sz + sizeof(gunichar2));
memcpy (record->raw_uni_path, pathbuf_start, pathbuf_sz);

default:
g_assert_not_reached ();
{
// Never set len = -1 for UCS2 source string
char *s = g_convert (record->raw_uni_path,
ucs2_strnlen (record->raw_uni_path, pathbuf_sz) * sizeof (gunichar2),
"UTF-8", "UTF-16LE", NULL, NULL, NULL);
if (s)
{
g_free (s);
}
else
{
g_set_error_literal (&record->error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH,
_("Path contains broken unicode character(s)"));
}
}

if (! record->uni_path)
g_set_error_literal (&record->error, R2_REC_ERROR,
R2_REC_ERROR_CONV_PATH,
_("Trash file path conversion failed completely"));

return record;
}

Expand Down Expand Up @@ -212,21 +231,7 @@ _parse_record_cb (char *index_file,

g_debug ("Start populating record for '%s'...", basename);

switch (version)
{
case VERSION_VISTA:
record = _populate_record_data (buf, version,
(bufsize == VERSION1_FILE_SIZE - 1));
break;

case VERSION_WIN10:
record = _populate_record_data (buf, version, FALSE);
break;

default:
g_assert_not_reached();
}

record = _populate_record_data (buf, bufsize, version);
g_free (buf);

/* Check corresponding $R.... file existance and set record->gone */
Expand Down
80 changes: 53 additions & 27 deletions src/rifiuti.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <glib/gstdio.h>

#include "rifiuti.h"
#include "utils-conv.h"
#include "utils.h"


Expand Down Expand Up @@ -125,7 +126,7 @@ _validate_index_file (const char *filename,

rewind (fp);
*infile = fp;
meta->version = (uint64_t) ver;
meta->version = ver;
return TRUE;

validation_broken:
Expand All @@ -142,13 +143,13 @@ _populate_record_data (void *buf,
{
rbin_struct *record;
uint32_t drivenum;
size_t read;
char *legacy_fname;
size_t uni_buf_sz, null_terminator_offset;

record = g_malloc0 (sizeof (rbin_struct));

legacy_fname = g_malloc0 (RECORD_INDEX_OFFSET - LEGACY_FILENAME_OFFSET);
copy_field (legacy_fname, LEGACY_FILENAME_OFFSET, RECORD_INDEX_OFFSET);
// Verbatim path in ANSI code page
record->raw_legacy_path = g_malloc0 (RECORD_INDEX_OFFSET - LEGACY_FILENAME_OFFSET);
copy_field (record->raw_legacy_path, LEGACY_FILENAME_OFFSET, RECORD_INDEX_OFFSET);

/* Index number associated with the record */
copy_field (&record->index_n, RECORD_INDEX_OFFSET, DRIVE_LETTER_OFFSET);
Expand All @@ -170,10 +171,10 @@ _populate_record_data (void *buf,
record->gone = FILESTATUS_EXISTS;
// If file is not in recycle bin (restored or permanently deleted),
// first byte will be removed from filename
if (!*legacy_fname)
if (! *record->raw_legacy_path)
{
record->gone = FILESTATUS_GONE;
*legacy_fname = record->drive;
*record->raw_legacy_path = record->drive;
}

/* File deletion time */
Expand All @@ -187,30 +188,45 @@ _populate_record_data (void *buf,
record->filesize = GUINT64_FROM_LE (record->filesize);
g_debug ("filesize=%" PRIu64, record->filesize);

/*
* 1. Only bother populating legacy path if users need it,
* because otherwise we don't know which encoding to use
* 2. Enclose with angle brackets because they are not allowed
* in Windows file name, therefore stands out better that
* the escaped hex sequences are not part of real file name
*/
// Only bother checking legacy path when requested,
// because otherwise we don't know which encoding to use
if (legacy_encoding)
{
record->legacy_path = conv_path_to_utf8_with_tmpl (
legacy_fname, legacy_encoding,
"<\\%02X>", &read, &record->error);
char *s = g_convert (record->raw_legacy_path, -1,
"UTF-8", legacy_encoding, NULL, NULL, NULL);
if (s)
g_free (s);
else
g_set_error (&record->error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH,
_("Path contains character(s) that could not be "
"interpreted in %s encoding"), legacy_encoding);
}

g_free (legacy_fname);

if (bufsize == LEGACY_RECORD_SIZE)
return record;

/* Part below deals with unicode path only */

record->uni_path = conv_path_to_utf8_with_tmpl (
(char *) (buf + UNICODE_FILENAME_OFFSET), NULL,
"<\\u%04X>", &read, &record->error);
uni_buf_sz = UNICODE_RECORD_SIZE - UNICODE_FILENAME_OFFSET;
record->raw_uni_path = g_malloc (uni_buf_sz);
copy_field (record->raw_uni_path, UNICODE_FILENAME_OFFSET, UNICODE_RECORD_SIZE);
null_terminator_offset = ucs2_strnlen (
record->raw_uni_path, WIN_PATH_MAX) * sizeof (gunichar2);

{
// Never set len = -1 for wchar source string
char *s = g_convert (record->raw_uni_path, null_terminator_offset,
"UTF-8", "UTF-16LE", NULL, NULL, NULL);
if (s)
{
g_free (s);
}
else
{
g_set_error_literal (&record->error, R2_REC_ERROR, R2_REC_ERROR_CONV_PATH,
_("Path contains broken unicode character(s)"));
}
}

/*
* We check for junk memory filling the padding area after
Expand All @@ -226,22 +242,32 @@ _populate_record_data (void *buf,
* Looks like an ANSI codepage full path is filled in
* legacy path field, then overwritten in place by a 8.3
* version of path whenever applicable (which was always shorter).
*
* The 8.3 path generated from non-ascii seems to follow certain
* ruleset, but the exact detail is unknown:
* - accented latin chars transliterated to pure ASCII
* - first DBCS char converted to UCS2 codepoint
*/
if (junk_detected && ! *junk_detected)
{
void *ptr;
// Beware: start pos shouldn't be previously read bytes,
// as it may contain invalid seq and quit prematurely.
char *p = record->raw_uni_path + null_terminator_offset;

for (ptr = buf + UNICODE_FILENAME_OFFSET + read;
ptr < buf + UNICODE_RECORD_SIZE; ptr++)
while (p < record->raw_uni_path + uni_buf_sz)
{
if ( *(char *) ptr != '\0' )
if (*p != '\0')
{
g_debug ("Junk detected at offset 0x%tx of unicode path",
ptr - buf - UNICODE_FILENAME_OFFSET);
p - record->raw_uni_path);
*junk_detected = TRUE;
break;
}
p++;
}

if (*junk_detected)
hexdump (record->raw_uni_path, uni_buf_sz);
}

return record;
Expand Down
Loading