 5544d71bc8
			
		
	
	5544d71bc8
	
	
	
		
			
			Some things that popped out while reading the comments/documentation. Change-Id: I0ccecae3381447ede44bb855d91f997349be1562
		
			
				
	
	
		
			116 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			116 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # -*- coding: utf-8 -*-
 | |
| 
 | |
| #    Copyright (C) 2014 Yahoo! Inc. All Rights Reserved.
 | |
| #
 | |
| #    Licensed under the Apache License, Version 2.0 (the "License"); you may
 | |
| #    not use this file except in compliance with the License. You may obtain
 | |
| #    a copy of the License at
 | |
| #
 | |
| #         http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| #    Unless required by applicable law or agreed to in writing, software
 | |
| #    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 | |
| #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 | |
| #    License for the specific language governing permissions and limitations
 | |
| #    under the License.
 | |
| 
 | |
| import logging
 | |
| import os
 | |
| import sys
 | |
| 
 | |
| logging.basicConfig(level=logging.ERROR)
 | |
| 
 | |
| self_dir = os.path.abspath(os.path.dirname(__file__))
 | |
| top_dir = os.path.abspath(os.path.join(os.path.dirname(__file__),
 | |
|                                        os.pardir,
 | |
|                                        os.pardir))
 | |
| sys.path.insert(0, top_dir)
 | |
| sys.path.insert(0, self_dir)
 | |
| 
 | |
| # INTRO: These examples show a simplistic map/reduce implementation where
 | |
| # a set of mapper(s) will sum a series of input numbers (in parallel) and
 | |
| # return their individual summed result. A reducer will then use those
 | |
| # produced values and perform a final summation and this result will then be
 | |
| # printed (and verified to ensure the calculation was as expected).
 | |
| 
 | |
| import six
 | |
| 
 | |
| from taskflow import engines
 | |
| from taskflow.patterns import linear_flow
 | |
| from taskflow.patterns import unordered_flow
 | |
| from taskflow import task
 | |
| 
 | |
| 
 | |
| class SumMapper(task.Task):
 | |
|     def execute(self, inputs):
 | |
|         # Sums some set of provided inputs.
 | |
|         return sum(inputs)
 | |
| 
 | |
| 
 | |
| class TotalReducer(task.Task):
 | |
|     def execute(self, *args, **kwargs):
 | |
|         # Reduces all mapped summed outputs into a single value.
 | |
|         total = 0
 | |
|         for (k, v) in six.iteritems(kwargs):
 | |
|             # If any other kwargs was passed in, we don't want to use those
 | |
|             # in the calculation of the total...
 | |
|             if k.startswith('reduction_'):
 | |
|                 total += v
 | |
|         return total
 | |
| 
 | |
| 
 | |
| def chunk_iter(chunk_size, upperbound):
 | |
|     """Yields back chunk size pieces from zero to upperbound - 1."""
 | |
|     chunk = []
 | |
|     for i in range(0, upperbound):
 | |
|         chunk.append(i)
 | |
|         if len(chunk) == chunk_size:
 | |
|             yield chunk
 | |
|             chunk = []
 | |
| 
 | |
| 
 | |
| # Upper bound of numbers to sum for example purposes...
 | |
| UPPER_BOUND = 10000
 | |
| 
 | |
| # How many mappers we want to have.
 | |
| SPLIT = 10
 | |
| 
 | |
| # How big of a chunk we want to give each mapper.
 | |
| CHUNK_SIZE = UPPER_BOUND // SPLIT
 | |
| 
 | |
| # This will be the workflow we will compose and run.
 | |
| w = linear_flow.Flow("root")
 | |
| 
 | |
| # The mappers will run in parallel.
 | |
| store = {}
 | |
| provided = []
 | |
| mappers = unordered_flow.Flow('map')
 | |
| for i, chunk in enumerate(chunk_iter(CHUNK_SIZE, UPPER_BOUND)):
 | |
|     mapper_name = 'mapper_%s' % i
 | |
|     # Give that mapper some information to compute.
 | |
|     store[mapper_name] = chunk
 | |
|     # The reducer uses all of the outputs of the mappers, so it needs
 | |
|     # to be recorded that it needs access to them (under a specific name).
 | |
|     provided.append("reduction_%s" % i)
 | |
|     mappers.add(SumMapper(name=mapper_name,
 | |
|                           rebind={'inputs': mapper_name},
 | |
|                           provides=provided[-1]))
 | |
| w.add(mappers)
 | |
| 
 | |
| # The reducer will run last (after all the mappers).
 | |
| w.add(TotalReducer('reducer', requires=provided))
 | |
| 
 | |
| # Now go!
 | |
| e = engines.load(w, engine='parallel', store=store, max_workers=4)
 | |
| print("Running a parallel engine with options: %s" % e.options)
 | |
| e.run()
 | |
| 
 | |
| # Now get the result the reducer created.
 | |
| total = e.storage.get('reducer')
 | |
| print("Calculated result = %s" % total)
 | |
| 
 | |
| # Calculate it manually to verify that it worked...
 | |
| calc_total = sum(range(0, UPPER_BOUND))
 | |
| if calc_total != total:
 | |
|     sys.exit(1)
 |