Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions browsergym/core/src/browsergym/core/action/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,64 @@ def mouse_dblclick(x: float, y: float, button: Literal["left", "middle", "right"
page.mouse.dblclick(x, y, button=button)


def mouse_select_option(x: float, y: float, options: str | list[str]):
"""
Select one or multiple options in a native <select> element located at the
given coordinates. Use this instead of mouse_click for native
select/combobox dropdowns: Playwright's synthetic mouse events cannot
interact with the browser-rendered native option popup.

Note: closed shadow roots and cross-origin iframes are not reachable from
JavaScript and will not be descended into.

Examples:
mouse_select_option(270, 167, "blue")
mouse_select_option(270, 167, ["red", "green"])
"""
x, y = map_coordinates(page, x, y) # map coordinates to page coordinates
if demo_mode != "off":
smooth_move_visual_cursor_to(page, x, y)
highlight_by_box(page, {"x": x, "y": y, "width": 1, "height": 1})
# Descend through open shadow roots and same-origin iframes so the lookup
# works for selects nested inside either. Coordinates inside an iframe are
# relative to the iframe's own viewport, hence the rect subtraction.
# `iframe.contentDocument` returns null (does not throw) on cross-origin
# frames, so we just stop descent there.
elem = page.evaluate_handle(
"""([startX, startY]) => {
let cx = startX, cy = startY;
let el = document.elementFromPoint(cx, cy);
let prev = null;
while (el && el !== prev) {
prev = el;
if (el.shadowRoot) {
const next = el.shadowRoot.elementFromPoint(cx, cy);
if (next && next !== el) { el = next; continue; }
}
const tag = el.tagName;
if (tag === 'IFRAME' || tag === 'FRAME') {
const doc = el.contentDocument;
if (doc) {
const r = el.getBoundingClientRect();
const ix = cx - r.left, iy = cy - r.top;
const next = doc.elementFromPoint(ix, iy);
if (next) { el = next; cx = ix; cy = iy; continue; }
}
}
}
return el?.closest('select');
}""",
[x, y],
).as_element()
if elem is None:
raise ValueError(f"No <select> element found at coordinates ({x}, {y})")

def do(force: bool):
elem.select_option(options, force=force, timeout=500)

call_fun(do, retry_with_force)


def mouse_drag_and_drop(from_x: float, from_y: float, to_x: float, to_y: float):
"""
Drag and drop from a location to a location. Uses absolute client
Expand Down
2 changes: 2 additions & 0 deletions browsergym/core/src/browsergym/core/action/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
mouse_down,
mouse_drag_and_drop,
mouse_move,
mouse_select_option,
mouse_up,
mouse_upload_file,
new_tab,
Expand Down Expand Up @@ -69,6 +70,7 @@
mouse_click,
mouse_dblclick,
mouse_drag_and_drop,
mouse_select_option,
mouse_upload_file,
keyboard_down,
keyboard_up,
Expand Down
12 changes: 12 additions & 0 deletions tests/core/data/input_type/select_in_iframe.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html>
<body>
<h2>Outer page</h2>
<iframe
width="400"
height="200"
style="border:1px solid #888"
srcdoc='<!DOCTYPE html><html><body><label for="color">Pick a color:</label><select id="color" name="color"><option value="red">Red</option><option value="green">Green</option><option value="blue">Blue</option></select></body></html>'
></iframe>
</body>
</html>
18 changes: 18 additions & 0 deletions tests/core/data/input_type/select_in_shadow_dom.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<!DOCTYPE html>
<html>
<body>
<h2>Outer page</h2>
<div id="host"></div>
<script>
const root = document.getElementById("host").attachShadow({ mode: "open" });
root.innerHTML = `
<label for="color">Pick a color:</label>
<select id="color" name="color">
<option value="red">Red</option>
<option value="green">Green</option>
<option value="blue">Blue</option>
</select>
`;
</script>
</body>
</html>
16 changes: 16 additions & 0 deletions tests/core/data/input_type/select_input.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<!DOCTYPE html>
<html>
<body>

<h2>Select</h2>
<form>
<label for="color">Pick a color:</label>
<select id="color" name="color">
<option value="red">Red</option>
<option value="green">Green</option>
<option value="blue">Blue</option>
</select>
</form>

</body>
</html>
127 changes: 127 additions & 0 deletions tests/core/test_actions_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import bs4
import gymnasium as gym
import playwright.sync_api
import pytest
from pyparsing.exceptions import ParseException

Expand Down Expand Up @@ -33,6 +34,9 @@
TEXT_INPUT_URL = f"file://{__DATA_DIR}/input_type/text_input.html"
URL_INPUT_URL = f"file://{__DATA_DIR}/input_type/url_input.html"
CHECKBOX_URL = f"file://{__DATA_DIR}/input_type/checkbox_input.html"
SELECT_URL = f"file://{__DATA_DIR}/input_type/select_input.html"
SELECT_IN_IFRAME_URL = f"file://{__DATA_DIR}/input_type/select_in_iframe.html"
SELECT_IN_SHADOW_DOM_URL = f"file://{__DATA_DIR}/input_type/select_in_shadow_dom.html"
MULTI_IFRAME_URL = f"file://{__DATA_DIR}/basic_iframe_site/basic_iframe_2.html"
OBSTRUCTED_CHECKBOX_URL = f"file://{__DATA_DIR}/obstructed_checkbox_page.html"
LOTS_OF_IFRAMES_URL = f"file://{__DATA_DIR}/lots_of_iframes.html"
Expand Down Expand Up @@ -1271,6 +1275,129 @@ def get_checkbox_elem(obs):
assert not checkbox.has_attr("checked")


def test_mouse_select_option():
action_set = HighLevelActionSet(subsets=["coord"])

env = gym.make(
"browsergym/openended",
task_kwargs={"start_url": SELECT_URL},
headless=__HEADLESS,
slow_mo=__SLOW_MO,
timeout=__TIMEOUT,
action_mapping=action_set.to_python_code,
)

obs, info = env.reset()
assert not obs["last_action_error"]

# query the live page for the select bbox so the test does not depend on
# the device pixel ratio of the host (DOM-observation coords can be in
# device pixels on retina displays, which would silently miss-click).
page = env.unwrapped.page
box = page.locator("select#color").bounding_box()
x, y = box["x"] + box["width"] / 2, box["y"] + box["height"] / 2

def selected_value():
return page.evaluate("() => document.getElementById('color').value")

# select by visible label
obs, reward, term, trunc, info = env.step(f"mouse_select_option({repr(x)}, {repr(y)}, 'Blue')")
assert not obs["last_action_error"]
assert selected_value() == "blue"

# select by value
obs, reward, term, trunc, info = env.step(f"mouse_select_option({repr(x)}, {repr(y)}, 'red')")
assert not obs["last_action_error"]
assert selected_value() == "red"

# coordinates not over a <select> should raise
obs, reward, term, trunc, info = env.step("mouse_select_option(0, 0, 'red')")
assert "ValueError" in obs["last_action_error"]

env.close()


@pytest.mark.parametrize(
"start_url",
[SELECT_IN_IFRAME_URL, SELECT_IN_SHADOW_DOM_URL],
ids=["iframe", "shadow-dom"],
)
def test_mouse_select_option_nested(start_url):
action_set = HighLevelActionSet(subsets=["coord"])

env = gym.make(
"browsergym/openended",
task_kwargs={"start_url": start_url},
headless=__HEADLESS,
slow_mo=__SLOW_MO,
timeout=__TIMEOUT,
action_mapping=action_set.to_python_code,
)

obs, info = env.reset()
assert not obs["last_action_error"]

page = env.unwrapped.page
# ensure nested document(s) have finished loading; only swallow the
# narrow Playwright timeout/navigation error class.
for frame in page.frames:
try:
frame.wait_for_load_state("domcontentloaded")
except playwright.sync_api.Error:
pass

# The select lives inside either a same-origin iframe or an open shadow
# root. Resolve its viewport-relative bbox and current value via a JS
# walker that descends into both.
def select_bbox_and_value():
return page.evaluate(
"""() => {
function findSelect(root) {
if (root.querySelector) {
const s = root.querySelector('select');
if (s) return s;
}
const all = root.querySelectorAll ? root.querySelectorAll('*') : [];
for (const el of all) {
if (el.shadowRoot) {
const s = findSelect(el.shadowRoot);
if (s) return s;
}
if ((el.tagName === 'IFRAME' || el.tagName === 'FRAME') && el.contentDocument) {
const s = findSelect(el.contentDocument);
if (s) {
const r = el.getBoundingClientRect();
const ir = s.getBoundingClientRect();
return {
rect: { x: r.left + ir.left, y: r.top + ir.top, w: ir.width, h: ir.height },
value: s.value,
_inIframe: true,
};
}
}
}
return null;
}
const s = findSelect(document);
if (!s) return null;
if (s._inIframe) return s;
const r = s.getBoundingClientRect();
return { rect: { x: r.left, y: r.top, w: r.width, h: r.height }, value: s.value };
}"""
)

info_before = select_bbox_and_value()
assert info_before is not None
r = info_before["rect"]
x, y = r["x"] + r["w"] / 2, r["y"] + r["h"] / 2

obs, reward, term, trunc, info = env.step(f"mouse_select_option({repr(x)}, {repr(y)}, 'Blue')")
assert not obs["last_action_error"]
assert select_bbox_and_value()["value"] == "blue"

env.close()


# test that forced action can click an obstructed element
@pytest.mark.parametrize("retry_with_force", [True, False])
def test_forced_actions(retry_with_force):
Expand Down