|
| 1 | +# |
| 2 | +# Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | +# contributor license agreements. See the NOTICE file distributed with |
| 4 | +# this work for additional information regarding copyright ownership. |
| 5 | +# The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | +# (the "License"); you may not use this file except in compliance with |
| 7 | +# the License. You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, software |
| 12 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | +# See the License for the specific language governing permissions and |
| 15 | +# limitations under the License. |
| 16 | +# |
| 17 | + |
| 18 | +"""Unit tests for bundle processing.""" |
| 19 | +# pytype: skip-file |
| 20 | + |
| 21 | +from __future__ import absolute_import |
| 22 | + |
| 23 | +import unittest |
| 24 | + |
| 25 | +from apache_beam.runners.worker.bundle_processor import DataInputOperation |
| 26 | + |
| 27 | + |
| 28 | +def simple_split(first_residual_index): |
| 29 | + return first_residual_index - 1, None, None, first_residual_index |
| 30 | + |
| 31 | + |
| 32 | +def element_split(frac, index): |
| 33 | + return ( |
| 34 | + index - 1, |
| 35 | + 'Primary(%0.1f)' % frac, |
| 36 | + 'Residual(%0.1f)' % (1 - frac), |
| 37 | + index + 1) |
| 38 | + |
| 39 | + |
| 40 | +class SplitTest(unittest.TestCase): |
| 41 | + def split( |
| 42 | + self, |
| 43 | + index, |
| 44 | + current_element_progress, |
| 45 | + fraction_of_remainder, |
| 46 | + buffer_size, |
| 47 | + allowed=(), |
| 48 | + sdf=False): |
| 49 | + return DataInputOperation._compute_split( |
| 50 | + index, |
| 51 | + current_element_progress, |
| 52 | + float('inf'), |
| 53 | + fraction_of_remainder, |
| 54 | + buffer_size, |
| 55 | + allowed_split_points=allowed, |
| 56 | + try_split=lambda frac: element_split(frac, 0)[1:3] if sdf else None) |
| 57 | + |
| 58 | + def sdf_split(self, *args, **kwargs): |
| 59 | + return self.split(*args, sdf=True, **kwargs) |
| 60 | + |
| 61 | + def test_simple_split(self): |
| 62 | + # Split as close to the beginning as possible. |
| 63 | + self.assertEqual(self.split(0, 0, 0, 16), simple_split(1)) |
| 64 | + # The closest split is at 4, even when just above or below it. |
| 65 | + self.assertEqual(self.split(0, 0, 0.24, 16), simple_split(4)) |
| 66 | + self.assertEqual(self.split(0, 0, 0.25, 16), simple_split(4)) |
| 67 | + self.assertEqual(self.split(0, 0, 0.26, 16), simple_split(4)) |
| 68 | + # Split the *remainder* in half. |
| 69 | + self.assertEqual(self.split(0, 0, 0.5, 16), simple_split(8)) |
| 70 | + self.assertEqual(self.split(2, 0, 0.5, 16), simple_split(9)) |
| 71 | + self.assertEqual(self.split(6, 0, 0.5, 16), simple_split(11)) |
| 72 | + |
| 73 | + def test_split_with_element_progress(self): |
| 74 | + # Progress into the active element influences where the split of the |
| 75 | + # remainder falls. |
| 76 | + self.assertEqual(self.split(0, 0.5, 0.25, 4), simple_split(1)) |
| 77 | + self.assertEqual(self.split(0, 0.9, 0.25, 4), simple_split(2)) |
| 78 | + self.assertEqual(self.split(1, 0.0, 0.25, 4), simple_split(2)) |
| 79 | + self.assertEqual(self.split(1, 0.1, 0.25, 4), simple_split(2)) |
| 80 | + |
| 81 | + def test_split_with_element_allowed_splits(self): |
| 82 | + # The desired split point is at 4. |
| 83 | + self.assertEqual( |
| 84 | + self.split(0, 0, 0.25, 16, allowed=(2, 3, 4, 5)), simple_split(4)) |
| 85 | + # If we can't split at 4, choose the closest possible split point. |
| 86 | + self.assertEqual( |
| 87 | + self.split(0, 0, 0.25, 16, allowed=(2, 3, 5)), simple_split(5)) |
| 88 | + self.assertEqual( |
| 89 | + self.split(0, 0, 0.25, 16, allowed=(2, 3, 6)), simple_split(3)) |
| 90 | + |
| 91 | + # Also test the case where all possible split points lie above or below |
| 92 | + # the desired split point. |
| 93 | + self.assertEqual( |
| 94 | + self.split(0, 0, 0.25, 16, allowed=(5, 6, 7)), simple_split(5)) |
| 95 | + self.assertEqual( |
| 96 | + self.split(0, 0, 0.25, 16, allowed=(1, 2, 3)), simple_split(3)) |
| 97 | + |
| 98 | + # We have progressed beyond all possible split points, so can't split. |
| 99 | + self.assertEqual(self.split(5, 0, 0.25, 16, allowed=(1, 2, 3)), None) |
| 100 | + |
| 101 | + def test_sdf_split(self): |
| 102 | + # Split between future elements at element boundaries. |
| 103 | + self.assertEqual(self.sdf_split(0, 0, 0.51, 4), simple_split(2)) |
| 104 | + self.assertEqual(self.sdf_split(0, 0, 0.49, 4), simple_split(2)) |
| 105 | + self.assertEqual(self.sdf_split(0, 0, 0.26, 4), simple_split(1)) |
| 106 | + self.assertEqual(self.sdf_split(0, 0, 0.25, 4), simple_split(1)) |
| 107 | + |
| 108 | + # If the split falls inside the first, splittable element, split there. |
| 109 | + self.assertEqual( |
| 110 | + self.sdf_split(0, 0, 0.20, 4), (-1, 'Primary(0.8)', 'Residual(0.2)', 1)) |
| 111 | + # The choice of split depends on the progress into the first element. |
| 112 | + self.assertEqual( |
| 113 | + self.sdf_split(0, 0, .125, 4), (-1, 'Primary(0.5)', 'Residual(0.5)', 1)) |
| 114 | + # Here we are far enough into the first element that splitting at 0.2 of the |
| 115 | + # remainder falls outside the first element. |
| 116 | + self.assertEqual(self.sdf_split(0, .5, 0.2, 4), simple_split(1)) |
| 117 | + |
| 118 | + # Verify the above logic when we are partially throug the stream. |
| 119 | + self.assertEqual(self.sdf_split(2, 0, 0.6, 4), simple_split(3)) |
| 120 | + self.assertEqual(self.sdf_split(2, 0.9, 0.6, 4), simple_split(4)) |
| 121 | + self.assertEqual( |
| 122 | + self.sdf_split(2, 0.5, 0.2, 4), (1, 'Primary(0.6)', 'Residual(0.4)', 3)) |
| 123 | + |
| 124 | + def test_sdf_split_with_allowed_splits(self): |
| 125 | + # This is where we would like to split, when all split points are available. |
| 126 | + self.assertEqual( |
| 127 | + self.sdf_split(2, 0, 0.2, 5, allowed=(1, 2, 3, 4, 5)), |
| 128 | + (1, 'Primary(0.6)', 'Residual(0.4)', 3)) |
| 129 | + # We can't split element at index 2, because 3 is not a split point. |
| 130 | + self.assertEqual( |
| 131 | + self.sdf_split(2, 0, 0.2, 5, allowed=(1, 2, 4, 5)), simple_split(4)) |
| 132 | + # We can't even split element at index 4 as above, because 4 is also not a |
| 133 | + # split point. |
| 134 | + self.assertEqual( |
| 135 | + self.sdf_split(2, 0, 0.2, 5, allowed=(1, 2, 5)), simple_split(5)) |
| 136 | + # We can't split element at index 2, because 2 is not a split point. |
| 137 | + self.assertEqual( |
| 138 | + self.sdf_split(2, 0, 0.2, 5, allowed=(1, 3, 4, 5)), simple_split(3)) |
| 139 | + |
| 140 | + |
| 141 | +if __name__ == '__main__': |
| 142 | + unittest.main() |
0 commit comments