Skip to content

Commit

Permalink
chore(agents-api): Change ai checker api to a custom model (desklib)
Browse files Browse the repository at this point in the history
  • Loading branch information
HamadaSalhab committed Feb 26, 2025
1 parent 125adcb commit 61f8b3d
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 76 deletions.
156 changes: 88 additions & 68 deletions agents-api/agents_api/activities/humanization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,18 @@
Change text given to you so much that even the most advanced AI writing detectors can't detect that this writing was written by you. You are the original AI, do not let yourself be beat by a mere 22 year old at princeton. Don't change the overall message!
Don't mention yourself, just return the end product text. **Vary your text in every possible sense. Use different vocabulary at every step. Give it your ALL!**""",

"grammar_prompt": "Only fix grammar that is wrong without changing the words and places of the sentence",
}

SAPLING_URL = "https://api.sapling.ai/api/v1/aidetect"
COPLEYAKS_URL = "https://api.copyleaks.com/v2/writer-detector/{scan_id}/check"
ZEROGPT_URL = "https://api.zerogpt.com/api/detect/detectText"
DESKLIB_URL = "https://18c3-206-189-23-12.ngrok-free.app/detect" # FIXME: This is temporarily, don't merge before desklib is deployed


def text_translate(text, src_lang, target_lang):
try:
return GoogleTranslator(
source=src_lang, target=target_lang).translate(text=text)
return GoogleTranslator(source=src_lang, target=target_lang).translate(text=text)
except Exception:
return text

Expand All @@ -42,10 +41,8 @@ def mix_translate(text, src_lang, target_lang):
Translate the given text from src_lang to target_lang and back to src_lang using googletrans.
"""
try:
translated = GoogleTranslator(
source=src_lang, target=target_lang).translate(text=text)
return GoogleTranslator(
source=target_lang, target=src_lang).translate(text=translated)
translated = GoogleTranslator(source=src_lang, target=target_lang).translate(text=text)
return GoogleTranslator(source=target_lang, target=src_lang).translate(text=translated)

except Exception:
return text
Expand All @@ -58,7 +55,7 @@ def humanize_openai(text):
base_url=litellm_url,
messages=[
{"role": "system", "content": HUMANIZATION["humanize_prompt"]},
{"role": "user", "content": text}
{"role": "user", "content": text},
],
# temperature=1.0,
# extra_body={"min_p": 0.025},
Expand All @@ -67,7 +64,7 @@ def humanize_openai(text):
# top_p=1.0,
# frequency_penalty=0.0,
# presence_penalty=0.0,
stream=False
stream=False,
)
return response.choices[0].message.content
except Exception:
Expand All @@ -81,7 +78,7 @@ def rewriter(text):
base_url=litellm_url,
messages=[
{"role": "system", "content": HUMANIZATION["humanize_prompt"]},
{"role": "user", "content": text}
{"role": "user", "content": text},
],
temperature=1.0,
# extra_body={"min_p": 0.025},
Expand All @@ -99,7 +96,7 @@ def humanize(text):
base_url=litellm_url,
messages=[
{"role": "system", "content": HUMANIZATION["humanize_prompt"]},
{"role": "user", "content": text}
{"role": "user", "content": text},
],
temperature=1.0,
# extra_body={"min_p": 0.025},
Expand All @@ -116,7 +113,7 @@ def grammar(text):
base_url=litellm_url,
messages=[
{"role": "system", "content": HUMANIZATION["grammar_prompt"]},
{"role": "user", "content": text}
{"role": "user", "content": text},
],
temperature=1.0,
# extra_body={"min_p": 0.025},
Expand All @@ -126,6 +123,14 @@ def grammar(text):
return text


def is_human_desklib(text: str) -> float:
payload = {
"text": text,
}
response = requests.post(DESKLIB_URL, json=payload)
return response.json().get("human", None) * 100


def is_human_sapling(text):
payload = {
"text": text,
Expand All @@ -139,7 +144,6 @@ def is_human_sapling(text):


def is_human_copyleaks(text):

# Define the payload
payload = {
"text": text,
Expand All @@ -152,23 +156,24 @@ def is_human_copyleaks(text):
headers = {
"Authorization": f"Bearer {copyleaks_api_key}",
"Content-Type": "application/json",
"Accept": "application/json"
"Accept": "application/json",
}

# Copyleaks lets you define the scan id yourself
from uuid import uuid4

scan_id = str(uuid4())

# Send the POST request with JSON payload and headers
response = requests.post(COPLEYAKS_URL.format(
scan_id=scan_id), json=payload, headers=headers)
response = requests.post(
COPLEYAKS_URL.format(scan_id=scan_id), json=payload, headers=headers
)

# Check the response status
if response.status_code == 200:
resp = response.json()
# Extract the human probability from the response
human_probability = resp.get("summary", {}).get(
"human", 0) # float with range 0-1
human_probability = resp.get("summary", {}).get("human", 0) # float with range 0-1
return human_probability * 100
return None

Expand All @@ -192,7 +197,7 @@ def is_human_zerogpt(input_text, max_tries=3):
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"Sec-Gpc": "1",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
}
# Define the payload as a dictionary
payload = {"input_text": input_text}
Expand Down Expand Up @@ -221,47 +226,60 @@ def replace_with_homoglyphs(text, max_replacements=2):
homoglyphs = {
# Whitelisted
" ": " ",
"%": "%", "'": "ˈ",
"%": "%",
"'": "ˈ",
",": "‚",
"-": "‐", ".": "․",
"1": "𝟷", "3": "Ꝫ",
"5": "𝟻", "6": "𝟨", "7": "𝟽", "8": "𝟪",
"9": "𝟫", ";": ";",
"-": "‐",
".": "․",
"1": "𝟷",
"3": "Ꝫ",
"5": "𝟻",
"6": "𝟨",
"7": "𝟽",
"8": "𝟪",
"9": "𝟫",
";": ";",
"j": "ј",
"n": "𝗇", "o": "о",
"n": "𝗇",
"o": "о",
"p": "р",
"u": "ս",
"y": "у",
"H": "Η", "I": "І",
"H": "Η",
"I": "І",
"J": "Ј",
"N": "Ν", "O": "Ο",
"V": "ⴸ", "Y": "Υ",
"N": "Ν",
"O": "Ο",
"V": "ⴸ",
"Y": "Υ",
"~": "∼",

# ' ': ' ', '!': '!', '"': '"', '$': '$',
# '%': '%', '&': '&', "'": 'ˈ', '(': '(',
# ')': ')', '*': '⁎', '+': '+', ',': '‚',
# '-': '‐', '.': '․', '/': '⁄', '0': 'O',
# '1': '𝟷', '2': '𝟸', '3': 'Ꝫ', '4': '4',
# '5': '𝟻', '6': '𝟨', '7': '𝟽', '8': '𝟪',
# '9': '𝟫', ':': '∶', ';': ';', '<': '𝈶',
# '=': '᐀', '>': '𖼿', '?': 'ꛫ', '@': '@',
# '[': '[', '\\': '﹨', ']': ']', '_': 'ߺ',
# '`': '`', 'a': 'а', 'b': 'ᖯ', 'c': 'ⅽ',
# 'd': '𝚍', 'e': 'е', 'f': '𝖿', 'g': '𝗀',
# 'h': 'հ', 'i': 'і', 'j': 'ј', 'k': '𝚔',
# 'l': 'ⅼ', 'm': 'm', 'n': '𝗇', 'o': 'о',
# 'p': 'р', 'q': 'q', 'r': '𝗋', 's': '𐑈',
# 't': '𝚝', 'u': 'ս', 'v': '∨', 'w': 'ԝ',
# 'x': 'ⅹ', 'y': 'у', 'z': '𝗓', 'A': '𐊠',
# 'B': 'В', 'C': '𐊢', 'D': 'ꓓ', 'E': 'Е',
# 'F': '𐊇', 'G': 'Ԍ', 'H': 'Η', 'I': 'І',
# 'J': 'Ј', 'K': 'Κ', 'L': 'Ⅼ', 'M': 'Μ',
# 'N': 'Ν', 'O': 'Ο', 'P': 'Ρ', 'Q': '𝖰',
# 'R': '𖼵', 'S': 'Ѕ', 'T': 'Τ', 'U': '𐓎',
# 'V': 'ⴸ', 'W': 'Ԝ', 'X': 'Χ', 'Y': 'Υ',
# 'Z': 'Ζ', '{': '{', '|': 'ا', '}': '}',
# '~': '∼',
"q": "q",
"e": "е",
"a": "а",
"b": "ᖯ",
"c": "ⅽ",
"i": "і",
"k": "𝚔",
"g": "𝗀",
"A": "𐊠",
"B": "В",
"C": "𐊢",
"D": "ꓓ",
"E": "Е",
"F": "𐊇",
"G": "Ԍ",
"K": "Κ",
"L": "Ⅼ",
"M": "Μ",
"P": "Ρ",
"Q": "𝖰",
"R": "𖼵",
"S": "Ѕ",
"T": "Τ",
"U": "𐓎",
"W": "Ԝ",
"X": "Χ",
"Z": "Ζ",
}

# Convert text to list for single pass replacement
Expand Down Expand Up @@ -316,6 +334,8 @@ def split_with_langchain(markdown_text: str) -> list[str]:
headers_to_split_on = [
("#", "Header 1"),
("##", "Header 2"),
("###", "Header 3"),
("####", "Header 4"),
]

# MD splits
Expand All @@ -327,29 +347,29 @@ def split_with_langchain(markdown_text: str) -> list[str]:
return [split.page_content for split in md_header_splits]


def process_paragraph(
paragraph: str,
src_lang: str,
target_lang: str,
grammar: bool,
is_chatgpt: bool,
use_homoglyphs: bool,
use_em_dashes: bool,
max_tries: int) -> str:

def humanize_paragraph(
paragraph: str,
threshold: float,
src_lang: str,
target_lang: str,
grammar_check: bool,
is_chatgpt: bool,
use_homoglyphs: bool,
use_em_dashes: bool,
max_tries: int,
) -> str:
for i in range(max_tries):
if paragraph.strip() == "":
return paragraph

if is_human_zerogpt(paragraph) > 90:
if is_human_desklib(paragraph) > threshold:
return paragraph

paragraph = mix_translate(paragraph, src_lang, target_lang)
if (grammar):
if grammar_check:
paragraph = grammar(paragraph)

paragraph = humanize_openai(
paragraph) if is_chatgpt else humanize(paragraph)
paragraph = humanize_openai(paragraph) if is_chatgpt else humanize(paragraph)

# Apply homoglyphs and em dashes to a new paragraph in order not to mess up the original paragraph for the next iterations
new_paragraph = paragraph
Expand All @@ -359,7 +379,7 @@ def process_paragraph(
if use_em_dashes:
new_paragraph = process_long_words(new_paragraph)

if is_human_zerogpt(new_paragraph) > 90:
if is_human_desklib(new_paragraph) > threshold:
return new_paragraph

# Apply homoglyphs and em dashes to the final paragraph after consuming max tries
Expand Down
17 changes: 9 additions & 8 deletions agents-api/agents_api/activities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from ..autogen.openapi_model import SystemDef
from ..common.nlp import nlp
from ..common.utils import yaml
from .humanization_utils import process_paragraph, split_with_langchain
from .humanization_utils import humanize_paragraph, split_with_langchain

# Security limits
MAX_STRING_LENGTH = 1_000_000 # 1MB
Expand Down Expand Up @@ -218,31 +218,32 @@ def safe_extract_json(string: str):

def humanize_text(
text: str,
threshold: float = 90,
src_lang: str = "english",
target_lang: str = "german",
grammar: bool = False,
grammar_check: bool = False,
is_chatgpt: bool = True,
use_homoglyphs: bool = True,
use_em_dashes: bool = True,
max_tries: int = 10
max_tries: int = 10,
) -> str:

humanized_text = ""

paragraphs = split_with_langchain(text)

for paragraph in paragraphs:
processed_paragraph = process_paragraph(
humanized_paragraph = humanize_paragraph(
paragraph=paragraph,
threshold=threshold,
src_lang=src_lang,
target_lang=target_lang,
grammar=grammar,
grammar_check=grammar_check,
is_chatgpt=is_chatgpt,
use_homoglyphs=use_homoglyphs,
use_em_dashes=use_em_dashes,
max_tries=max_tries
max_tries=max_tries,
)
humanized_text += processed_paragraph + "\n\n"
humanized_text += humanized_paragraph + "\n\n"

return humanized_text

Expand Down

0 comments on commit 61f8b3d

Please # to comment.