From a9630472be39ae5c5567b109363e82d634d76a0d Mon Sep 17 00:00:00 2001
From: maximzubkov <zubkov.md@phystech.edu>
Date: Sat, 29 May 2021 14:42:53 +0300
Subject: [PATCH 1/3] Fix astminer to code2seq

---
 code2seq/preprocessing/astminer_to_code2seq.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/code2seq/preprocessing/astminer_to_code2seq.py b/code2seq/preprocessing/astminer_to_code2seq.py
index 790440e..77d669f 100644
--- a/code2seq/preprocessing/astminer_to_code2seq.py
+++ b/code2seq/preprocessing/astminer_to_code2seq.py
@@ -10,7 +10,10 @@
 
 
 def _get_id2value_from_csv(path_: str) -> Dict[str, str]:
-    return dict(numpy.genfromtxt(path_, delimiter=",", dtype=(str, str))[1:])
+    with open(path_, "r") as f:
+        lines = f.read().strip().split("\n")[1:]
+        lines = [line.split(",", maxsplit=1) for line in lines]
+        return {k: v for k, v in lines}
 
 
 def preprocess_csv(data_folder: str, dataset_name: str, holdout_name: str, is_shuffled: bool):

From 1c79985757f218c3da36cf9fbf52e07a989d3d92 Mon Sep 17 00:00:00 2001
From: maximzubkov <zubkov.md@phystech.edu>
Date: Sat, 29 May 2021 14:43:54 +0300
Subject: [PATCH 2/3] Fix path to astminer script

---
 scripts/download_data.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/download_data.sh b/scripts/download_data.sh
index 699a435..560886d 100755
--- a/scripts/download_data.sh
+++ b/scripts/download_data.sh
@@ -9,7 +9,7 @@ LOAD_SPLITTED=false
 DATA_DIR=./data
 POJ_DOWNLOAD_SCRIPT=./scripts/download_poj.sh
 CODEFORCES_DOWNLOAD_SCRIPT=./scripts/download_codeforces.sh
-ASTMINER_PATH=../astminer/build/shadow/lib-0.*.jar
+ASTMINER_PATH=../astminer/build/shadow/astminer.jar
 SPLIT_SCRIPT=./scripts/split_dataset.sh
 
 function is_int(){

From caf3fc3abcb761a0b42cef6d37ab79013c15f933 Mon Sep 17 00:00:00 2001
From: maximzubkov <zubkov.md@phystech.edu>
Date: Sat, 29 May 2021 14:50:13 +0300
Subject: [PATCH 3/3] Fix mypy issues

---
 code2seq/preprocessing/astminer_to_code2seq.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/code2seq/preprocessing/astminer_to_code2seq.py b/code2seq/preprocessing/astminer_to_code2seq.py
index 77d669f..4d478b1 100644
--- a/code2seq/preprocessing/astminer_to_code2seq.py
+++ b/code2seq/preprocessing/astminer_to_code2seq.py
@@ -12,8 +12,8 @@
 def _get_id2value_from_csv(path_: str) -> Dict[str, str]:
     with open(path_, "r") as f:
         lines = f.read().strip().split("\n")[1:]
-        lines = [line.split(",", maxsplit=1) for line in lines]
-        return {k: v for k, v in lines}
+        parsed_lines = [line.split(",", maxsplit=1) for line in lines]
+        return {k: v for k, v in parsed_lines}
 
 
 def preprocess_csv(data_folder: str, dataset_name: str, holdout_name: str, is_shuffled: bool):