Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Vyokky/dev #130

Merged
merged 11 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 0 additions & 35 deletions .github/workflows/document_deploy.yml

This file was deleted.

9 changes: 0 additions & 9 deletions documents/docs/about/CODE_OF_CONDUCT.md

This file was deleted.

1 change: 1 addition & 0 deletions documents/docs/configurations/developer_configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ The customization configuration parameters are used for customizations in the UF

| Configuration Option | Description | Type | Default Value |
|------------------------|----------------------------------------------|---------|---------------------------------------|
| `ASK_QUESTION` | Whether to ask the user for a question. | Boolean | True |
| `USE_CUSTOMIZATION` | Whether to enable the customization. | Boolean | True |
| `QA_PAIR_FILE` | The path for the historical QA pairs. | String | "customization/historical_qa.txt" |
| `QA_PAIR_NUM` | The number of QA pairs for the customization.| Integer | 20 |
Expand Down
4 changes: 2 additions & 2 deletions documents/docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ UFO sightings have garnered attention from various media outlets, including:
* For other communications, please contact [ufo-agent@microsoft.com](mailto:ufo-agent@microsoft.com)
---

## 🎬 Demo Examples
<!-- ## 🎬 Demo Examples

We present two demo videos that complete user request on Windows OS using UFO. For more case study, please consult our [technical report](https://arxiv.org/abs/2402.07939).

Expand All @@ -82,7 +82,7 @@ In this example, we will demonstrate how to efficiently use UFO to delete all no
#### 2️⃣📧 Example 2: Composing an email using text from multiple sources.
In this example, we will demonstrate how to utilize UFO to extract text from Word documents, describe an image, compose an email, and send it seamlessly. Enjoy the versatility and efficiency of cross-application experiences with UFO!

<iframe width="560" height="315" src="https://github.com/microsoft/UFO/assets/11352048/aa41ad47-fae7-4334-8e0b-ba71c4fc32e0" frameborder="0" allowfullscreen></iframe>
<iframe width="560" height="315" src="https://github.com/microsoft/UFO/assets/11352048/aa41ad47-fae7-4334-8e0b-ba71c4fc32e0" frameborder="0" allowfullscreen></iframe> -->

&nbsp;
## 📚 Citation
Expand Down
35 changes: 22 additions & 13 deletions ufo/agents/agent/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,27 +236,36 @@ def process_resume(self) -> None:
if self.processor:
self.processor.resume()

def process_asker(self) -> None:
def process_asker(self, ask_user: bool = True) -> None:
"""
Ask for the process.
:param ask_user: Whether to ask the user for the questions.
"""
if self.processor:
question_list = self.processor.question_list

utils.print_with_color(
"Could you please answer the following questions to help me understand your needs and complete the task?",
"yellow",
)
if ask_user:
utils.print_with_color(
"Could you please answer the following questions to help me understand your needs and complete the task?",
"yellow",
)

for index, question in enumerate(question_list):
answer = question_asker(question, index + 1)
if not answer.strip():
continue
qa_pair = {"question": question, "answer": answer}

utils.append_string_to_file(
configs["QA_PAIR_FILE"], json.dumps(qa_pair)
)
if ask_user:
answer = question_asker(question, index + 1)
if not answer.strip():
continue
qa_pair = {"question": question, "answer": answer}

utils.append_string_to_file(
configs["QA_PAIR_FILE"], json.dumps(qa_pair)
)

else:
qa_pair = {
"question": question,
"answer": "The answer for the question is not available, please proceed with your own knowledge or experience, or leave it as a placeholder. Do not ask the same question again.",
}

self.blackboard.add_questions(qa_pair)

Expand Down
39 changes: 21 additions & 18 deletions ufo/agents/processors/app_agent_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,34 +290,37 @@ def execute_action(self) -> None:
Execute the action.
"""

control_selected = self._annotation_dict.get(self._control_label, "")
control_selected = self._annotation_dict.get(self._control_label, None)

try:
# Get the selected control item from the annotation dictionary and LLM response.
# The LLM response is a number index corresponding to the key in the annotation dictionary.

if control_selected:
if self._operation:

if configs.get("SHOW_VISUAL_OUTLINE_ON_SCREEN", True):
control_selected.draw_outline(colour="red", thickness=3)
time.sleep(configs.get("RECTANGLE_TIME", 0))

control_coordinates = PhotographerDecorator.coordinate_adjusted(
self.application_window.rectangle(), control_selected.rectangle()
)

self._control_log = {
"control_class": control_selected.element_info.class_name,
"control_type": control_selected.element_info.control_type,
"control_automation_id": control_selected.element_info.automation_id,
"control_friendly_class_name": control_selected.friendly_class_name(),
"control_coordinates": {
"left": control_coordinates[0],
"top": control_coordinates[1],
"right": control_coordinates[2],
"bottom": control_coordinates[3],
},
}
if control_selected:
control_coordinates = PhotographerDecorator.coordinate_adjusted(
self.application_window.rectangle(),
control_selected.rectangle(),
)
self._control_log = {
"control_class": control_selected.element_info.class_name,
"control_type": control_selected.element_info.control_type,
"control_automation_id": control_selected.element_info.automation_id,
"control_friendly_class_name": control_selected.friendly_class_name(),
"control_coordinates": {
"left": control_coordinates[0],
"top": control_coordinates[1],
"right": control_coordinates[2],
"bottom": control_coordinates[3],
},
}
else:
self._control_log = {}

self.app_agent.Puppeteer.receiver_manager.create_ui_control_receiver(
control_selected, self.application_window
Expand Down
5 changes: 1 addition & 4 deletions ufo/agents/processors/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def process(self) -> None:
if self.is_error() or self.is_paused():
# If the session is pending, update the step and memory, and return.
if self.is_pending():
self.update_step()
self.update_status()
self.update_memory()

return
Expand Down Expand Up @@ -148,9 +148,6 @@ def resume(self) -> None:
# Step 3: Update the status.
self.update_status()

# Step 4: Update the step.
self.update_step()

self._is_resumed = False

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion ufo/agents/states/app_agent_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def handle(self, agent: "AppAgent", context: Optional["Context"] = None) -> None
"""

# Ask the user questions to help the agent to proceed.
agent.process_asker()
agent.process_asker(ask_user=configs.get("ASK_QUESTION", False))

def next_state(self, agent: AppAgent) -> AppAgentState:
"""
Expand Down
2 changes: 1 addition & 1 deletion ufo/agents/states/host_agent_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def handle(self, agent: "HostAgent", context: Optional["Context"] = None) -> Non
"""

# Ask the user questions to help the agent to proceed.
agent.process_asker()
agent.process_asker(ask_user=configs.get("ASK_QUESTION", False))

def is_round_end(self) -> bool:
"""
Expand Down
5 changes: 5 additions & 0 deletions ufo/automator/puppeteer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def execute_command(
"""

command = self.create_command(command_name, params, *args, **kwargs)

return command.execute()

def execute_all_commands(self) -> List[Any]:
Expand Down Expand Up @@ -189,6 +190,10 @@ def create_ui_control_receiver(
:param control: The control element.
:return: The UI controller receiver.
"""

if not control or not application:
return None

factory: ReceiverFactory = self.receiver_factory_registry.get("UIControl").get(
"factory"
)
Expand Down
6 changes: 6 additions & 0 deletions ufo/automator/ui_control/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ def click_on_coordinates(self, params: Dict[str, str]) -> str:
# Get the absolute coordinates of the application window.
tranformed_x, tranformed_y = self.transform_point(x, y)

self.application.set_focus()

pyautogui.click(
tranformed_x, tranformed_y, button=button, clicks=2 if double else 1
)
Expand All @@ -129,6 +131,8 @@ def drag_on_coordinates(self, params: Dict[str, str]) -> str:

button = params.get("button", "left")

self.application.set_focus()

pyautogui.moveTo(start[0], start[1])
pyautogui.dragTo(end[0], end[1], button=button)

Expand Down Expand Up @@ -327,6 +331,7 @@ def __init__(self, receiver: ControlReceiver, params=None) -> None:
Initialize the command.
:param receiver: The receiver of the command.
"""

self.receiver = receiver
self.params = params if params is not None else {}

Expand Down Expand Up @@ -437,6 +442,7 @@ def execute(self) -> str:
Execute the drag on coordinates command.
:return: The result of the drag on coordinates command.
"""

return self.receiver.drag_on_coordinates(self.params)

@classmethod
Expand Down
16 changes: 13 additions & 3 deletions ufo/automator/ui_control/openfile.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import time
from typing import Dict

import psutil
from pywinauto import Desktop
Expand Down Expand Up @@ -33,17 +34,20 @@ def __init__(self):
"notepad",
"msteams:",
"ms-todo:",
"calc",
"ms-clock:",
"mspaint",
]
self.app_map = AppMappings()

def execute_code(self, args: dict) -> bool:
def execute_code(self, args: Dict) -> bool:
"""
Execute the code to open some files.
:param args: The arguments of the code, which should at least contains name of APP and the file path we want to open
(ps. filepath can be empty.)
:return: The result of the execution or error.
"""
self.APP = args["APP"]
self.APP = args.get("APP", "")
self.file_path = args.get("file_path", "")
self.check_open_status()
if self.openstatus:
Expand Down Expand Up @@ -114,7 +118,7 @@ def is_file_open_in_app(self) -> bool:
return True
return False

def open_third_party_APP(self, args: dict) -> bool:
def open_third_party_APP(self, args: Dict) -> bool:
# TODO: open third party app
pass

Expand Down Expand Up @@ -158,6 +162,9 @@ class AppMappings:
"firefox": "Firefox",
"excel": "Excel",
"ms-settings:": "Settings",
"calc": "Calculator",
"ms-clock:": "Clock",
"mspaint": "Paint",
}

app_process_map = {
Expand All @@ -173,6 +180,9 @@ class AppMappings:
"firefox": ["firefox.exe", "firefox"],
"excel": ["EXCEL.EXE", "excel"],
"ms-settings:": ["SystemSettings.exe", "ms-settings"],
"ms-clock": ["Time.exe", "ms-clock"],
"calc": ["CalculatorApp.exe", "calc"],
"mspaint": ["mspaint.exe", "mspaint"],
}

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion ufo/automator/ui_control/screenshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def capture(self, save_path: str = None):
:return: The screenshot."""
# Capture single window screenshot
screenshot = self.control.capture_as_image()
if save_path is not None:
if save_path is not None and screenshot is not None:
screenshot.save(save_path, compress_level=DEFAULT_PNG_COMPRESS_LEVEL)
return screenshot

Expand Down
3 changes: 2 additions & 1 deletion ufo/config/config_dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,13 @@ CONTROL_FILTER_TOP_K_ICON: 15 # The control filter top k for icon similarity
CONTROL_FILTER_MODEL_SEMANTIC_NAME: "all-MiniLM-L6-v2" # The control filter model name of semantic similarity
CONTROL_FILTER_MODEL_ICON_NAME: "clip-ViT-B-32" # The control filter model name of icon similarity

ALLOW_OPENAPP: FALSE # Whether to allow the open app action
ALLOW_OPENAPP: True # Whether to allow the open app action
LOG_XML: False # Whether to log the xml file for the at every step.
SCREENSHOT_TO_MEMORY: True # Whether to allow the screenshot to memory for the agent's decision making.


# For customizations
ASK_QUESTION: True # Whether to allow the agent to ask questions
USE_CUSTOMIZATION: True # Whether to use the customization
QA_PAIR_FILE: "customization/historical_qa.txt" # The path for the historical QA
QA_PAIR_NUM: 20 # The number of QA pairs for the customization
Expand Down
Loading