# A reducer for the word-count task that uses a fixed amount of memory. # (This isn't quite true as ints in python can be arbitrarily large, but # it it doubtful this program would ever use more than a few MBs of memory :) import sys # These keep track of the last word we saw and how many times we've seen it last_word = None word_count = 0 for line in sys.stdin: parts = line.split() (word, c) = (parts[0], int(parts[1])) # We're on to a new word now, and it's not the first word if word != last_word and last_word is not None: print "%s\t%s" % (last_word, word_count) word_count = 0 last_word = word word_count += c # We have to print out the last key that we see, since it won't # get printed as we iterate over the lines of the file print "%s\t%s" % (last_word, word_count)