Sort files in a list

Say I have a list of files

files = ['s1.txt', 'ai1.txt', 's2.txt', 'ai3.txt']

and I need to sort them in sub-lists based on their number so that

files = [['s1.txt', 'ai1.txt'], ['s2.txt'], ['ai3.txt']]

I could write a bunch of loops, however I am wondering if there is a better way to do this?

+5
source share
4 answers

Here is a complete working example based on defaultdict:

import re
from collections import defaultdict

files = ['s1.txt', 'ai1.txt', 's2.txt', 'ai3.txt']

def get_key(fname):
   return int(re.findall(r'\d+', fname)[0])

d = defaultdict(list)
for f in files:
   d[get_key(f)].append(f)

out = [d[k] for k in sorted(d.keys())]
print(out)

This gives:

[['s1.txt', 'ai1.txt'], ['s2.txt'], ['ai3.txt']]
+6
source
import itertools
import re

r_number = re.compile("^.*([0-9]+).*$")

def key_for_filename(filename):
    # Edit: This doesn't check for missing numbers.
    return r_number.match(filename).group(1)

grouped = [list(v) for k, v in
           itertools.groupby(sorted(files, key=key_for_filename),
                             key_for_filename)]
+4
source

, :

def file_number(name):
    return re.search(r"\d+", "s1.txt").group(0)

( , , .)

, :

files.sort(key=file_number)

itertools.groupby():

for number, group in itertools.groupby(files, file_number):
    # whatever
+1

- .

#!/usr/bin/python

from itertools import groupby
import re
import pprint

def findGroup(record):
    return re.match(".*?(\d+).txt$", record).group(1)

files = [ 's1.txt', 'ai1.txt', 's2.txt', 'ai3.txt', 'foo1.txt', 'foo54.txt' ]

results = {}
for k,g in groupby(files, findGroup):
    if not results.has_key(k):
        results[k] = []
    results[k].append([x for x in g])

pprint.pprint(results)

, , .

:

{'1': [['s1.txt', 'ai1.txt'], ['foo1.txt']],
 '2': [['s2.txt']],
 '3': [['ai3.txt']],
 '54': [['foo54.txt']]}
0

All Articles