From 3e502bf471bb1692ee0623db42e3384501655fc9 Mon Sep 17 00:00:00 2001
From: Yingtong Dou <ytongdou@gmail.com>
Date: Thu, 30 Jul 2020 23:38:30 -0500
Subject: [PATCH] Add HACUD model

---
 README.md                                   | 13 +++++++------
 algorithms/GraphConsis/neigh_samplers.py    |  5 +++++
 algorithms/GraphConsis/supervised_models.py |  5 +++++
 algorithms/GraphSage/README.md              | 18 ++++++++++++++++--
 algorithms/HACUD/README.md                  |  2 +-
 algorithms/HACUD/data_loader.py             |  5 +++++
 algorithms/HACUD/get_data.py                |  5 +++++
 algorithms/HACUD/main.py                    |  5 +++++
 algorithms/HACUD/model.py                   |  5 +++++
 algorithms/HACUD/parse.py                   |  5 +++++
 algorithms/HACUD/utils.py                   |  5 +++++
 reference/hacud.txt                         | 12 ++++++++++++
 setup.py                                    | 12 +++++++-----
 13 files changed, 83 insertions(+), 14 deletions(-)
 create mode 100644 reference/hacud.txt

diff --git a/README.md b/README.md
index 38fa45a..920a763 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@
 
 **Introduction** 
 
-**DGFraud** is a Graph Neural Network (GNN) based toolbox for fraud detection. It integrates the implementation & comparison of state-of-the-art GNN-based fraud detection models. It also includes several utility functions such as graph preprocessing, graph sampling, and performance evaluation. The introduction of implemented models can be found [here](#implemented-models). <!-- (Add introduction blogs links). -->
+**DGFraud** is a Graph Neural Network (GNN) based toolbox for fraud detection. It integrates the implementation & comparison of state-of-the-art GNN-based fraud detection models. The introduction of implemented models can be found [here](#implemented-models). <!-- (Add introduction blogs links). -->
 
 We welcome contributions on adding new fraud detectors and extending the features of the toolbox. Some of the planned features are listed in [TODO list](#todo-list). 
 
@@ -41,6 +41,7 @@ If you use the toolbox in your project, please cite the [paper](https://arxiv.or
 ```
 
 **Useful Resources**
+- [UGFraud: An Unsupervised Graph-based Toolbox for Fraud Detection](https://github.com/safe-graph/UGFraud)
 - [Graph-based Fraud Detection Paper List](https://github.com/safe-graph/graph-fraud-detection-papers) 
 - [Awesome Fraud Detection Papers](https://github.com/benedekrozemberczki/awesome-fraud-detection-papers)
 - [Attack and Defense Papers on Graph Data](https://github.com/safe-graph/graph-adversarial-learning-literature)
@@ -71,7 +72,7 @@ python setup.py install
 * tensorflow>=1.14.0,<2.0
 * numpy>=1.16.4
 * scipy>=1.2.0
-networkx<=1.11
+* networkx<=1.11
 ```
 ## Datasets
 
@@ -144,7 +145,7 @@ The repository is organized as follows:
 | **GEM** | [Heterogeneous Graph Neural Networks for Malicious Account Detection](https://arxiv.org/pdf/2002.12307.pdf)  | CIKM 2018 |[BibTex](https://github.com/safe-graph/DGFraud/blob/master/reference/gem.txt) |
 | **GraphSAGE** | [Inductive Representation Learning on Large Graphs](https://arxiv.org/pdf/1706.02216.pdf)  | NIPS 2017  | [BibTex](https://github.com/safe-graph/DGFraud/blob/master/reference/graphsage.txt) |
 | **GraphConsis** | [Alleviating the Inconsistency Problem of Applying Graph Neural Network to Fraud Detection](https://arxiv.org/pdf/2005.00625.pdf)  | SIGIR 2020  | [BibTex](https://github.com/safe-graph/DGFraud/blob/master/reference/graphconsis.txt) |
-<!--| **HACUD** | [Cash-Out User Detection Based on Attributed Heterogeneous Information Network with a Hierarchical Attention Mechanism](https://aaai.org/ojs/index.php/AAAI/article/view/3884)  | AAAI 2019 |  Bibtex |-->
+| **HACUD** | [Cash-Out User Detection Based on Attributed Heterogeneous Information Network with a Hierarchical Attention Mechanism](https://aaai.org/ojs/index.php/AAAI/article/view/3884)  | AAAI 2019 |  [BibTex](https://github.com/safe-graph/DGFraud/blob/master/reference/hacud.txt) |
 
 
 ## Model Comparison
@@ -158,7 +159,7 @@ The repository is organized as follows:
 | **GEM** | Financial Fraud  | Heterogeneous |GCN |
 | **GraphSAGE** | Opinion Fraud  | Homogeneous   | GraphSAGE |
 | **GraphConsis** | Opinion Fraud  | Heterogeneous   | GraphSAGE |
-<!--| **HACUD** |  |  |   |-->
+| **HACUD** | Financial Fraud | Heterogeneous | GAT |
 
 
 ## TODO List
@@ -170,8 +171,8 @@ The repository is organized as follows:
 - Benchmarking SOTA models
 - Scalable implementation
 - TensorFlow 2.0+ implementation
-- Pytorch version
+- Pytorch implementation
 
 ## How to Contribute
-You are welcomed to contribute to this open-source toolbox. The detailed instructions will be released soon. Currently, you can create issues or send email to [ytongdou@gmail.com](mailto:ytongdou@gmail.com) for inquiry.
+You are welcomed to contribute to this open-source toolbox. The detailed instructions will be released soon. Currently, you can create issues or send email to [bdscsafegraph@gmail.com](mailto:bdscsafegraph@gmail.com) for inquiry.
 
diff --git a/algorithms/GraphConsis/neigh_samplers.py b/algorithms/GraphConsis/neigh_samplers.py
index 7ab5cac..8408b9d 100644
--- a/algorithms/GraphConsis/neigh_samplers.py
+++ b/algorithms/GraphConsis/neigh_samplers.py
@@ -1,3 +1,8 @@
+'''
+This code is due to Zhiwei Liu (@JimLiu96) and UIC BDSC Lab
+DGFraud (A Deep Graph-based Toolbox for Fraud Detection)
+https://github.com/safe-graph/DGFraud
+'''
 from __future__ import division
 from __future__ import print_function
 
diff --git a/algorithms/GraphConsis/supervised_models.py b/algorithms/GraphConsis/supervised_models.py
index de48a2d..aafc3ad 100644
--- a/algorithms/GraphConsis/supervised_models.py
+++ b/algorithms/GraphConsis/supervised_models.py
@@ -1,3 +1,8 @@
+'''
+This code is due to Zhiwei Liu (@JimLiu96) and UIC BDSC Lab
+DGFraud (A Deep Graph-based Toolbox for Fraud Detection)
+https://github.com/safe-graph/DGFraud
+'''
 import tensorflow as tf
 import models as models
 import layers as layers
diff --git a/algorithms/GraphSage/README.md b/algorithms/GraphSage/README.md
index a26e7df..4362e8f 100644
--- a/algorithms/GraphSage/README.md
+++ b/algorithms/GraphSage/README.md
@@ -1,4 +1,18 @@
-# Description
+# GraphSAGE
+
+## Paper
+The GraphSAGE model is proposed by the [paper](http://papers.nips.cc/paper/6703-inductive-representation-learning-on-large-graphs.pdf) below:
+```bibtex
+@inproceedings{hamilton2017inductive,
+  title={Inductive representation learning on large graphs},
+  author={Hamilton, Will and Ying, Zhitao and Leskovec, Jure},
+  booktitle={Advances in neural information processing systems},
+  pages={1024--1034},
+  year={2017}
+}
+```
+
+# Brief Introduction
 We revise the original code of [graphsage](https://github.com/williamleif/GraphSAGE/tree/master/graphsage) so that it can load our data format and train the model.
 
 # Run the code
@@ -10,4 +24,4 @@ line 28 in `utils.py` file
 ```python
 rownetworks = [data['net_rur']]
 ```
-- Before running the code, please remember unzip the given dataset. 
+- Before running the code, please remember unzip the given YelpChi dataset. 
diff --git a/algorithms/HACUD/README.md b/algorithms/HACUD/README.md
index 1ceb8ca..26a675d 100644
--- a/algorithms/HACUD/README.md
+++ b/algorithms/HACUD/README.md
@@ -4,7 +4,7 @@
 
 The HACUD model is proposed by the [paper](https://aaai.org/ojs/index.php/AAAI/article/view/3884) below:
 
-```
+```bibtex
 @inproceedings{DBLP:conf/aaai/HuZSZLQ19,
   author    = {Binbin Hu and
                Zhiqiang Zhang and
diff --git a/algorithms/HACUD/data_loader.py b/algorithms/HACUD/data_loader.py
index fa7e9d8..9937e94 100644
--- a/algorithms/HACUD/data_loader.py
+++ b/algorithms/HACUD/data_loader.py
@@ -1,3 +1,8 @@
+'''
+This code is due to Hengrui Zhang (@hengruizhang98) and UIC BDSC Lab
+DGFraud (A Deep Graph-based Toolbox for Fraud Detection)
+https://github.com/safe-graph/DGFraud
+'''
 import numpy as np
 from sklearn.model_selection import train_test_split
 import scipy.io as sio
diff --git a/algorithms/HACUD/get_data.py b/algorithms/HACUD/get_data.py
index b8eb00f..775ac6e 100755
--- a/algorithms/HACUD/get_data.py
+++ b/algorithms/HACUD/get_data.py
@@ -1,3 +1,8 @@
+'''
+This code is due to Hengrui Zhang (@hengruizhang98) and UIC BDSC Lab
+DGFraud (A Deep Graph-based Toolbox for Fraud Detection)
+https://github.com/safe-graph/DGFraud
+'''
 import numpy as np
 import random as rd
 import scipy.sparse as sp
diff --git a/algorithms/HACUD/main.py b/algorithms/HACUD/main.py
index b335adf..878473e 100755
--- a/algorithms/HACUD/main.py
+++ b/algorithms/HACUD/main.py
@@ -1,3 +1,8 @@
+'''
+This code is due to Hengrui Zhang (@hengruizhang98) and UIC BDSC Lab
+DGFraud (A Deep Graph-based Toolbox for Fraud Detection)
+https://github.com/safe-graph/DGFraud
+'''
 import numpy as np
 import pandas as pd 
 import os
diff --git a/algorithms/HACUD/model.py b/algorithms/HACUD/model.py
index 6fdbbb1..df1ce79 100755
--- a/algorithms/HACUD/model.py
+++ b/algorithms/HACUD/model.py
@@ -1,3 +1,8 @@
+'''
+This code is due to Hengrui Zhang (@hengruizhang98) and UIC BDSC Lab
+DGFraud (A Deep Graph-based Toolbox for Fraud Detection)
+https://github.com/safe-graph/DGFraud
+'''
 import tensorflow as tf
 import os
 import sys
diff --git a/algorithms/HACUD/parse.py b/algorithms/HACUD/parse.py
index e144290..a6250d3 100755
--- a/algorithms/HACUD/parse.py
+++ b/algorithms/HACUD/parse.py
@@ -1,3 +1,8 @@
+'''
+This code is due to Hengrui Zhang (@hengruizhang98) and UIC BDSC Lab
+DGFraud (A Deep Graph-based Toolbox for Fraud Detection)
+https://github.com/safe-graph/DGFraud
+'''
 import argparse
 
 def parse_args():
diff --git a/algorithms/HACUD/utils.py b/algorithms/HACUD/utils.py
index 31e27c6..4c6abae 100644
--- a/algorithms/HACUD/utils.py
+++ b/algorithms/HACUD/utils.py
@@ -1,3 +1,8 @@
+'''
+This code is due to Hengrui Zhang (@hengruizhang98) and UIC BDSC Lab
+DGFraud (A Deep Graph-based Toolbox for Fraud Detection)
+https://github.com/safe-graph/DGFraud
+'''
 import random
 import scipy.io as sio
 import scipy.sparse as sp
diff --git a/reference/hacud.txt b/reference/hacud.txt
new file mode 100644
index 0000000..780cd5f
--- /dev/null
+++ b/reference/hacud.txt
@@ -0,0 +1,12 @@
+@inproceedings{DBLP:conf/aaai/HuZSZLQ19,
+  author    = {Binbin Hu and
+               Zhiqiang Zhang and
+               Chuan Shi and
+               Jun Zhou and
+               Xiaolong Li and
+               Yuan Qi},
+  title     = {Cash-Out User Detection Based on Attributed Heterogeneous Information
+               Network with a Hierarchical Attention Mechanism},
+  booktitle = {The Thirty-Third AAAI Conference on Artificial Intelligence},
+  year      = {2019}
+}
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 252727c..df67adf 100644
--- a/setup.py
+++ b/setup.py
@@ -12,9 +12,10 @@
     requirements = f.read().splitlines()
 
 setup(name='DGFraud',
-      author="Yutong Deng, Yingtong Dou and UIC BDSC Lab",
-      author_email="ytongdou@gmail.com",
-      description='a GNN based toolbox for fraud detection in Tensorflow',
+      version="0.1.0",
+      author="Yutong Deng, Yingtong Dou, Hengrui Zhang, and UIC BDSC Lab",
+      author_email="bdscsafegraph@gmail.com",
+      description='a GNN-based toolbox for fraud detection in Tensorflow',
       long_description=open("README.md", "r", encoding="utf-8").read(),
       long_description_content_type="text/markdown",
       url='https://github.com/safe-graph/DGFraud',
@@ -24,13 +25,14 @@
       install_requires=['numpy>=1.16.4',
                         'tensorflow>=1.14.0,<2.0',
                         'scipy>=1.2.1',
-                        'scikit_learn>=0.21rc2'
+                        'scikit_learn>=0.21rc2',
+                        'networkx<=1.11'
                         ],
       packages=find_packages(exclude=['test']),
       include_package_data=True,
       setup_requires=['setuptools>=38.6.0'],
       classifiers=[
-          'Development Status :: 3 - Alpha',
+          'Development Status :: 4 - Beta',
           'Intended Audience :: Education',
           'Intended Audience :: Financial and Insurance Industry',
           'Intended Audience :: Science/Research',