Sort versions in Python

I'm trying to get it so that 1.7.0 comes after 1.7.0.rc0, but before 1.8.0, as it should be if you sorted the versions. I thought the whole point of LooseVersion was that it did the sorting and comparison of this kind correctly.

>>> from distutils.version import LooseVersion >>> versions = ["1.7.0", "1.7.0.rc0", "1.8.0"] >>> lv = [LooseVersion(v) for v in versions] >>> sorted(lv, reverse=True) [LooseVersion ('1.8.0'), LooseVersion ('1.7.0.rc0'), LooseVersion ('1.7.0')] 
+8
source share
5 answers

BASIC EDIT: The old answer was too hectic. Here are two more beautiful solutions.

So, currently, I see three ways to achieve the desired order, frees up candidates "rc" to the actual releases.

  • my old, imperative style
  • use "b" instead of "rc" to use StrictVersion , from the same package
  • extend the Version class to add support for custom tags and tag organizing

1. Old, imperative style

 from distutils.version import LooseVersion versions = ["1.7.0", "1.7.0.rc0", "1.8.0"] lv = [LooseVersion(v) for v in versions] lv.sort() sorted_rc = [v.vstring for v in lv] import re p = re.compile('rc\\d+$') i = 0 # skip the first RCs while i + 1 < len(sorted_rc): m = p.search(sorted_rc[i]) if m: i += 1 else: break while i + 1 < len(sorted_rc): tmp = sorted_rc[i] m = p.search(sorted_rc[i+1]) if m and sorted_rc[i+1].startswith(tmp): sorted_rc[i] = sorted_rc[i+1] sorted_rc[i+1] = tmp i += 1 

with this i get:

 ['1.7.0rc0', '1.7.0', '1.11.0'] 

2. Use "b" instead of "rc"

The distutils.version package also has another StrictVersion class that does the job if your 1.7.0.rc0 can be written as 1.7.0a0 or 1.7.0b0 , marking alpha or beta releases.

I.e:

 from distutils.version import StrictVersion versions = ["1.7.0", "1.7.0b0", "1.11.0"] sorted(versions, key=StrictVersion) 

This gives:

 ['1.7.0b0', '1.7.0', '1.11.0'] 

Translation from one form to another can be done using the re module.

3. Extend the version class

An obvious problem with the previous solution is the lack of StrictVersion flexibility. By changing the attribute of the version_re class to use rc instead of a or b , even if it takes 1.7.1rc0 , it still prints it as 1.7.1r0 (starting with python 2.7.3).

We can fix this by implementing our own custom version class. This can be done as follows: with some unit tests to ensure correctness, at least in some cases:

 #!/usr/bin/python # file: version2.py from distutils import version import re import functools @functools.total_ordering class NumberedVersion(version.Version): """ A more flexible implementation of distutils.version.StrictVersion This implementation allows to specify: - an arbitrary number of version numbers: not only '1.2.3' , but also '1.2.3.4.5' - the separator between version numbers: '1-2-3' is allowed when '-' is specified as separator - an arbitrary ordering of pre-release tags: 1.1alpha3 < 1.1beta2 < 1.1rc1 < 1.1 when ["alpha", "beta", "rc"] is specified as pre-release tag list """ def __init__(self, vstring=None, sep='.', prerel_tags=('a', 'b')): version.Version.__init__(self) # super() is better here, but Version is an old-style class self.sep = sep self.prerel_tags = dict(zip(prerel_tags, xrange(len(prerel_tags)))) self.version_re = self._compile_pattern(sep, self.prerel_tags.keys()) self.sep_re = re.compile(re.escape(sep)) if vstring: self.parse(vstring) _re_prerel_tag = 'rel_tag' _re_prerel_num = 'tag_num' def _compile_pattern(self, sep, prerel_tags): sep = re.escape(sep) tags = '|'.join(re.escape(tag) for tag in prerel_tags) if tags: release_re = '(?:(?P<{tn}>{tags})(?P<{nn}>\d+))?'\ .format(tags=tags, tn=self._re_prerel_tag, nn=self._re_prerel_num) else: release_re = '' return re.compile(r'^(\d+)(?:{sep}(\d+))*{rel}$'\ .format(sep=sep, rel=release_re)) def parse(self, vstring): m = self.version_re.match(vstring) if not m: raise ValueError("invalid version number '{}'".format(vstring)) tag = m.group(self._re_prerel_tag) tag_num = m.group(self._re_prerel_num) if tag is not None and tag_num is not None: self.prerelease = (tag, int(tag_num)) vnum_string = vstring[:-(len(tag) + len(tag_num))] else: self.prerelease = None vnum_string = vstring self.version = tuple(map(int, self.sep_re.split(vnum_string))) def __repr__(self): return "{cls} ('{vstring}', '{sep}', {prerel_tags})"\ .format(cls=self.__class__.__name__, vstring=str(self), sep=self.sep, prerel_tags = list(self.prerel_tags.keys())) def __str__(self): s = self.sep.join(map(str,self.version)) if self.prerelease: return s + "{}{}".format(*self.prerelease) else: return s def __lt__(self, other): """ Fails when the separator is not the same or when the pre-release tags are not the same or do not respect the same order. """ # TODO deal with trailing zeroes: eg "1.2.0" == "1.2" if self.prerel_tags != other.prerel_tags or self.sep != other.sep: raise ValueError("Unable to compare: instances have different" " structures") if self.version == other.version and self.prerelease is not None and\ other.prerelease is not None: tag_index = self.prerel_tags[self.prerelease[0]] other_index = self.prerel_tags[other.prerelease[0]] if tag_index == other_index: return self.prerelease[1] < other.prerelease[1] return tag_index < other_index elif self.version == other.version: return self.prerelease is not None and other.prerelease is None return self.version < other.version def __eq__(self, other): tag_index = self.prerel_tags[self.prerelease[0]] other_index = other.prerel_tags[other.prerelease[0]] return self.prerel_tags == other.prerel_tags and self.sep == other.sep\ and self.version == other.version and tag_index == other_index and\ self.prerelease[1] == other.prerelease[1] import unittest class TestNumberedVersion(unittest.TestCase): def setUp(self): self.v = NumberedVersion() def test_compile_pattern(self): p = self.v._compile_pattern('.', ['a', 'b']) tests = {'1.2.3': True, '1a0': True, '1': True, '1.2.3.4a5': True, 'b': False, '1c0': False, ' 1': False, '': False} for test, result in tests.iteritems(): self.assertEqual(result, p.match(test) is not None, \ "test: {} result: {}".format(test, result)) def test_parse(self): tests = {"1.2.3.4a5": ((1, 2, 3, 4), ('a', 5))} for test, result in tests.iteritems(): self.v.parse(test) self.assertEqual(result, (self.v.version, self.v.prerelease)) def test_str(self): tests = (('1.2.3',), ('10-2-42rc12', '-', ['rc'])) for t in tests: self.assertEqual(t[0], str(NumberedVersion(*t))) def test_repr(self): v = NumberedVersion('1,2,3rc4', ',', ['lol', 'rc']) expected = "NumberedVersion ('1,2,3rc4', ',', ['lol', 'rc'])" self.assertEqual(expected, repr(v)) def test_order(self): test = ["1.7.0", "1.7.0rc0", "1.11.0"] expected = ['1.7.0rc0', '1.7.0', '1.11.0'] versions = [NumberedVersion(v, '.', ['rc']) for v in test] self.assertEqual(expected, list(map(str,sorted(versions)))) if __name__ == '__main__': unittest.main() 

Thus, it can be used as follows:

 import version2 versions = ["1.7.0", "1.7.0rc2", "1.7.0rc1", "1.7.1", "1.11.0"] sorted(versions, key=lambda v: version2.NumberedVersion(v, '.', ['rc'])) 

exit:

 ['1.7.0rc1', '1.7.0rc2', '1.7.0', '1.7.1', '1.11.0'] 

So in conclusion, use the batteries included in python, or deploy your own.

About this implementation: it could be improved by turning to trailing zeros in releases, and memoize compiling regular expressions.

+7
source
 >>> from distutils.version import LooseVersion >>> versions = ["1.7.0", "1.7.0rc0", "1.11.0"] >>> sorted(versions, key=LooseVersion) ['1.7.0', '1.7.0rc0', '1.11.0'] 

from documents

Version numbering for anarchists and software implementers. Implements a standard interface for version number classes, as described above. The version number consists of a series of numbers separated by either periods or lines of letters. When comparing version numbers, numeric components will be compared numerically, and alphabetical components will be lexical.
...
In fact, there is no such thing as an invalid version number in this schema; the rules of comparison are simple and predictable, but may not always give the desired results (for some definition of "I want").

so you see that there are no special skills about treating "rc" specifically

You can see how the version number is broken as follows

 >>> LooseVersion('1.7.0rc0').version [1, 7, 0, 'rc', 0] 
+10
source

I am using pkg_resources :

 from pkg_resources import parse_version def test_version_sorting(): expected = ['1.0.0dev0', '1.0.0dev1', '1.0.0dev2', '1.0.0dev10', '1.0.0rc0', '1.0.0rc2', '1.0.0rc5', '1.0.0rc21', '1.0.0', '1.1.0', '1.1.1', '1.1.11', '1.2.0', '1.3.0', '1.23.0', '2.0.0', ] alphabetical = sorted(expected) shuffled = sorted(expected, key=lambda x: random()) assert expected == sorted(alphabetical, key=parse_version) assert expected == sorted(shuffled, key=parse_version) 

Note that creating random ordering from the expected version list makes this a potentially unstable unit test, since the two runs will not have the same data. However, in this case, it should not matter ... I hope so.

+2
source

I use this:

 #!/usr/bin/python import re def sort_software_versions(versions = [], reverse = False): def split_version(version): def toint(x): try: return int(x) except: return x return map(toint, re.sub(r'([az])([0-9])', r'\1.\2', re.sub(r'([0-9])([az])', r'\1.\2', version.lower().replace('-', '.'))).split('.')) def compare_version_list(l1, l2): def compare_version(v1, v2): if isinstance(v1, int): if isinstance(v2, int): return v1 - v2 else: return 1 else: if isinstance(v2, int): return -1 else: return cmp(v1, v2) ret = 0 n1 = len(l1) n2 = len(l2) if n1 < n2: l1.extend([0]*(n2 - n1)) if n2 < n1: l2.extend([0]*(n1 - n2)) n = max(n1, n2) i = 0 while not ret and i < n: ret = compare_version(l1[i], l2[i]) i += 1 return ret return sorted(versions, cmp = compare_version_list, key = split_version, reverse = reverse) 

 print(sort_software_versions(['1.7.0', '1.7.0.rc0', '1.8.0'])) ['1.7.0.rc0', '1.7.0', '1.8.0'] 

Thus, it correctly processes alpha, beta, rc. It can deal with hyphenated versions, or when people stick "rc" to the version. Re.sub may use compiled regexp, but this works fine.

+1
source

I found this useful and a little easier:

 from packaging import version vers = ["1.7.0", "1.7.0rc2", "1.7.0rc1", "1.7.1", "1.11.0"] sorted(vers, key=lambda x: version.Version(x)) 

That leads to:

 ['1.7.0rc1', '1.7.0rc2', '1.7.0', '1.7.1', '1.11.0'] 

Adding reverse=True puts them in a β€œdownward” order, which I find useful.

 ['1.11.0', '1.7.1', '1.7.0', '1.7.0rc2', '1.7.0rc1'] 

It can sort quite a large number of version-style numbers (my testbed was Linux v4.11.16, etc.)

+1
source

Source: https://habr.com/ru/post/924483/


All Articles