A good approach in such cases is not pushing SQL to such an extent that it becomes confusing, difficult to understand and maintain. Let SQL do what is convenient and can handle query results in Python.
Here is a cut out version of a simple crosstab generator that I wrote. The full version delivers totals of rows / columns / greats.
You will notice that it has a built-in βgroup byβ - the original use case was to summarize the data received from Excel files using Python and xlrd.
row_key and col_key that you supply do not have to be strings, as in the example; they can be tuples - for example, (year, week) in your case - or they can be integers. you have a string column name mapping with an integer sort key.
import sys class CrossTab(object): def __init__( self, missing=0, # what to return for an empty cell. Alternatives: '', 0.0, None, 'NULL' ): self.missing = missing self.col_key_set = set() self.cell_dict = {} self.headings_OK = False def add_item(self, row_key, col_key, value): self.col_key_set.add(col_key) try: self.cell_dict[row_key][col_key] += value except KeyError: try: self.cell_dict[row_key][col_key] = value except KeyError: self.cell_dict[row_key] = {col_key: value} def _process_headings(self): if self.headings_OK: return self.row_headings = list(sorted(self.cell_dict.iterkeys())) self.col_headings = list(sorted(self.col_key_set)) self.headings_OK = True def get_col_headings(self): self._process_headings() return self.col_headings def generate_row_info(self): self._process_headings() for row_key in self.row_headings: row_dict = self.cell_dict[row_key] row_vals = [row_dict.get(col_key, self.missing) for col_key in self.col_headings] yield row_key, row_vals def dump(self, f=None, header=None, footer='', ): if f is None: f = sys.stdout alist = self.__dict__.items() alist.sort() if header is not None: print >> f, header for attr, value in alist: print >> f, "%s: %r" % (attr, value) if footer is not None: print >> f, footer if __name__ == "__main__": data = [ ['Rob', 'Morn', 240], ['Rob', 'Aft', 300], ['Joe', 'Morn', 70], ['Joe', 'Aft', 80], ['Jill', 'Morn', 100], ['Jill', 'Aft', 150], ['Rob', 'Aft', 40], ['Rob', 'aft', 5], ['Dozy', 'Aft', 1],
Output:
['AM', 'PM', 'XXXX'] ('Dozy', [None, 1, None]) ('Jill', [100, 150, None]) ('Joe', [70, 80, None]) ('Nemo', [None, None, -1]) ('Rob', [240, 345, None])