Skip to content

Commit abcfe76

Browse files
committed
Fix handling of AI extract rules
1 parent f3a7669 commit abcfe76

File tree

5 files changed

+32
-1
lines changed

5 files changed

+32
-1
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ Signup to ScrapingBee to [get your API key](https://app.scrapingbee.com/account/
4040
'device': 'desktop',
4141
# Use some data extraction rules
4242
'extract_rules': {'title': 'h1'},
43+
# Use AI to extract data from the page
44+
'ai_extract_rules': {'product_name': 'The name of the product', 'price': 'The price in USD'},
4345
# Wrap response in JSON
4446
'json_response': False,
4547
# Interact with the webpage you want to scrape

scrapingbee/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "2.0.1"
1+
__version__ = "2.0.2"

scrapingbee/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ def process_params(params: dict) -> dict:
4646
new_params[k] = process_cookies(v)
4747
elif k == 'extract_rules':
4848
new_params[k] = process_json_stringify_param(v, 'extract_rules')
49+
elif k == 'ai_extract_rules':
50+
new_params[k] = process_json_stringify_param(v, 'ai_extract_rules')
4951
elif k == 'js_scenario':
5052
new_params[k] = process_json_stringify_param(v, 'js_scenario')
5153
else:

tests/test_client.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,27 @@ def test_get_with_js_scenario(mock_session, client):
113113
)
114114

115115

116+
@mock.patch('scrapingbee.client.Session')
117+
def test_get_with_ai_extract_rules(mock_session, client):
118+
'''It should format the ai_extract_rules and add them to the url'''
119+
client.get('https://httpbin.org', params={
120+
'ai_extract_rules': {
121+
"product_name": "The name of the product",
122+
"price": "The price in USD"
123+
}
124+
})
125+
126+
mock_session.return_value.request.assert_called_with(
127+
'GET',
128+
'https://app.scrapingbee.com/api/v1/'
129+
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&'
130+
'ai_extract_rules=%7B%22product_name%22%3A+%22The+name+of+the+product%22%2C+%22'
131+
'price%22%3A+%22The+price+in+USD%22%7D',
132+
data=None,
133+
headers=DEFAULT_HEADERS,
134+
)
135+
136+
116137
@mock.patch('scrapingbee.client.Session')
117138
def test_post(mock_session, client):
118139
'''It should make a POST request with some data'''

tests/test_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ def test_process_js_scenario():
4646
assert output == '{"instructions": [{"click": "#buttonId"}]}'
4747

4848

49+
def test_process_ai_extract_rules():
50+
"""It should format ai_extract_rules to a stringified JSON"""
51+
output = process_json_stringify_param({"product_name": "The name of the product", "price": "The price in USD"}, "ai_extract_rules")
52+
assert output == '{"product_name": "The name of the product", "price": "The price in USD"}'
53+
54+
4955
def test_process_params():
5056
"""It should keep boolean parameters"""
5157
output = process_params({"render_js": True})

0 commit comments

Comments
 (0)