-
Notifications
You must be signed in to change notification settings - Fork 19
Improvement/revamp cursor iteration #44
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
20a43a1
61dc775
5915e13
ae6993d
18dd233
60b82b7
8c671cb
2deb232
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,7 +23,6 @@ def __init__(self, client, service_name, representation, **specs): | |
| self.service_name = service_name | ||
| self.representation = representation | ||
| self.specs = specs | ||
| self.retrieved = 0 | ||
| self._skip = 0 | ||
| self._limit = float('inf') | ||
|
|
||
|
|
@@ -41,19 +40,7 @@ def __iter__(self): | |
| """Iterate over all AppNexus objects matching the specifications""" | ||
| for page in self.iter_pages(): | ||
| data = self.extract_data(page) | ||
| if self._skip >= len(data): | ||
| self._skip -= len(data) | ||
| continue | ||
| elif self._skip: | ||
| self._skip = 0 | ||
| data = data[self._skip:] | ||
| lasting = self._limit - self.retrieved | ||
| if not lasting: | ||
| break | ||
| elif lasting < len(data): | ||
| data = data[:lasting] | ||
| for entity in data: | ||
| self.retrieved += 1 | ||
| yield entity | ||
|
|
||
| def extract_data(self, page): | ||
|
|
@@ -86,15 +73,17 @@ def get_page(self, start_element=0, num_elements=None): | |
| specs.update(start_element=start_element, num_elements=num_elements) | ||
| return self.client.get(self.service_name, **specs) | ||
|
|
||
| def iter_pages(self, skip_elements=0): | ||
| def iter_pages(self): | ||
| """Iterate as much as needed to get all available pages""" | ||
| start_element = skip_elements | ||
| start_element = self._skip | ||
| num_elements = min(self._limit, self.batch_size) | ||
| count = -1 | ||
| while start_element < count or count == -1: | ||
| page = self.get_page(start_element) | ||
| page = self.get_page(start_element, num_elements) | ||
| yield page | ||
| start_element = page["start_element"] + page["num_elements"] | ||
| count = page["count"] | ||
| start_element = start_element + page["num_elements"] | ||
| num_elements = min(page["count"] - num_elements, self.batch_size) | ||
| count = min(page["count"], self._skip + self._limit) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't we just keep
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't address this comment. |
||
|
|
||
| def count(self): | ||
| """Returns the number of elements matching the specifications""" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,9 @@ | |
| from appnexus.client import AppNexusClient | ||
| from appnexus.cursor import Cursor | ||
|
|
||
| from .helpers import gen_random_collection | ||
| from .helpers import gen_collection | ||
|
|
||
| COLLECTION_SIZE = 324 | ||
|
|
||
|
|
||
| @pytest.fixture | ||
|
|
@@ -55,7 +57,12 @@ def response_dict2(): | |
|
|
||
| @pytest.fixture | ||
| def random_response_dict(): | ||
| return gen_random_collection(count=324) | ||
| return gen_collection(count=COLLECTION_SIZE, randomize=True) | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def ordered_response_dict(): | ||
| return gen_collection(count=COLLECTION_SIZE, randomize=False) | ||
|
|
||
|
|
||
| @pytest.fixture | ||
|
|
@@ -71,7 +78,28 @@ def random_cursor(mocker, random_response_dict): | |
| client = AppNexusClient("test", "test") | ||
| mocker.patch.object(client, "get") | ||
| client.get.side_effect = random_response_dict | ||
| return Cursor(client, "campaign", representations.raw) | ||
| cursor = Cursor(client, "campaign", representations.raw) | ||
| mocker.patch.object(cursor, "get_page", wraps=cursor.get_page) | ||
| return cursor | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def ordered_cursor(mocker, ordered_response_dict): | ||
| client = AppNexusClient("test", "test") | ||
| mocker.patch.object(client, "get") | ||
| client.get.side_effect = ordered_response_dict | ||
| cursor = Cursor(client, "campaign", representations.raw) | ||
| mocker.patch.object(cursor, "get_page", wraps=cursor.get_page) | ||
| return cursor | ||
|
|
||
|
|
||
| def mock_ordered_cursor(mocker, start=0, count=COLLECTION_SIZE, factor=1): | ||
| client = AppNexusClient("test", "test") | ||
| mocker.patch.object(client, "get") | ||
| client.get.side_effect = gen_collection(start, count) * factor | ||
| cursor = Cursor(client, "campaign", representations.raw) | ||
| mocker.patch.object(cursor, "get_page", wraps=cursor.get_page) | ||
| return cursor | ||
|
|
||
|
|
||
| def test_cursor_count(cursor, response_dict): | ||
|
|
@@ -164,3 +192,109 @@ def test_uncallable_representation(): | |
| def test_requests_volume_on_iteration(cursor): | ||
| _ = [r for r in cursor] | ||
| assert cursor.client.get.call_count == 1 | ||
|
|
||
|
|
||
| def test_skip_none(ordered_cursor): | ||
| results = [r for r in ordered_cursor] | ||
| assert len(results) == COLLECTION_SIZE | ||
| assert results[0]['id'] == 0 | ||
| assert results[-1]['id'] == COLLECTION_SIZE - 1 | ||
| assert ordered_cursor.get_page.call_count == 4 | ||
|
|
||
|
|
||
| def test_skip_ten(mocker): | ||
| skip = 10 | ||
| cursor = mock_ordered_cursor(mocker, start=skip) | ||
| cursor.skip(skip) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure to understand why you
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The goal of this test and the ones following is to validate that the cursor is iterating over pages properly taking I think the tests are ok as is (used them to validate the algo changes) though I admit that they could be a bit better. |
||
| results = [r for r in cursor] | ||
| assert len(results) == COLLECTION_SIZE - skip | ||
| assert results[0]['id'] == skip | ||
| assert results[-1]['id'] == COLLECTION_SIZE - 1 | ||
| assert cursor.get_page.call_count == 4 | ||
|
|
||
|
|
||
| def test_skip_hundred_ten(mocker): | ||
| skip = 110 | ||
| cursor = mock_ordered_cursor(mocker, start=skip) | ||
| cursor.skip(skip) | ||
| results = [r for r in cursor] | ||
| assert len(results) == COLLECTION_SIZE - skip | ||
| assert results[0]['id'] == skip | ||
| assert results[-1]['id'] == COLLECTION_SIZE - 1 | ||
| assert cursor.get_page.call_count == 3 | ||
|
|
||
|
|
||
| def test_skip_twice(mocker): | ||
| skip = 10 | ||
| cursor = mock_ordered_cursor(mocker, start=skip, factor=2) | ||
| cursor.skip(skip) | ||
| results = [r for r in cursor] | ||
| assert len(results) == COLLECTION_SIZE - skip | ||
| assert results[0]['id'] == skip | ||
| assert cursor.get_page.call_count == 4 | ||
| results = [r for r in cursor] | ||
| assert len(results) == COLLECTION_SIZE - skip | ||
| assert results[0]['id'] == skip | ||
| assert cursor.get_page.call_count == 8 | ||
|
|
||
|
|
||
| def test_limit_ten(mocker): | ||
| limit = 10 | ||
| cursor = mock_ordered_cursor(mocker, count=limit) | ||
| cursor.limit(limit) | ||
| results = [r for r in cursor] | ||
| assert len(results) == limit | ||
| assert results[0]['id'] == 0 | ||
| assert results[-1]['id'] == limit - 1 | ||
| assert cursor.get_page.call_count == 1 | ||
|
|
||
|
|
||
| def test_limit_hundred_ten(mocker): | ||
| limit = 110 | ||
| cursor = mock_ordered_cursor(mocker, count=limit) | ||
| cursor.limit(limit) | ||
| results = [r for r in cursor] | ||
| assert len(results) == limit | ||
| assert results[0]['id'] == 0 | ||
| assert results[-1]['id'] == limit - 1 | ||
| assert cursor.get_page.call_count == 2 | ||
|
|
||
|
|
||
| def test_limit_thousand(mocker): | ||
| limit = 1000 | ||
| cursor = mock_ordered_cursor(mocker) | ||
| cursor.limit(limit) | ||
| results = [r for r in cursor] | ||
| assert len(results) == COLLECTION_SIZE | ||
| assert results[0]['id'] == 0 | ||
| assert results[-1]['id'] == COLLECTION_SIZE - 1 | ||
| assert cursor.get_page.call_count == 4 | ||
|
|
||
|
|
||
| def test_limit_twice(mocker): | ||
| limit = 50 | ||
| cursor = mock_ordered_cursor(mocker, count=limit, factor=2) | ||
| cursor.limit(limit) | ||
| results = [r for r in cursor] | ||
| assert len(results) == limit | ||
| assert results[0]['id'] == 0 | ||
| assert results[-1]['id'] == limit - 1 | ||
| assert cursor.get_page.call_count == 1 | ||
| results = [r for r in cursor] | ||
| assert len(results) == limit | ||
| assert results[0]['id'] == 0 | ||
| assert results[-1]['id'] == limit - 1 | ||
| assert cursor.get_page.call_count == 2 | ||
|
|
||
|
|
||
| def test_skip_and_limit(mocker): | ||
| skip = 10 | ||
| limit = 150 | ||
| cursor = mock_ordered_cursor(mocker, start=skip, count=skip + limit) | ||
| cursor.skip(skip) | ||
| cursor.limit(limit) | ||
| results = [r for r in cursor] | ||
| assert len(results) == limit | ||
| assert results[0]['id'] == skip | ||
| assert results[-1]['id'] == limit + skip - 1 | ||
| assert cursor.get_page.call_count == 2 | ||
Uh oh!
There was an error while loading. Please reload this page.