The first common item from two lists

x = [8,2,3,4,5] y = [6,3,7,2,1] 

How to find the first common element in two lists (in this case "2") in a concise and elegant way? Any list can be empty or there can be no common elements - in this case there will be nothing.

I need this to show python to someone who is not familiar with it, the simpler the better.

UPD: order is not important for my purposes, but suppose I'm looking for the first element in x, which also occurs in y.

+8
source share
10 answers

It should be straightforward and almost as effective as when receiving it (a more efficient check of the decision of Ashvini Chaudharis answers and for the most effective check the jamylaks answer and comments):

 result = None # Go trough one array for i in x: # The element repeats in the other list... if i in y: # Store the result and break the loop result = i break 

Or a more elegant event would be to encapsulate the same functionality as a function using PEP 8, as well as coding style rules

 def get_first_common_element(x,y): ''' Fetches first element from x that is common for both lists or return None if no such an element is found. ''' for i in x: if i in y: return i # In case no common element found, you could trigger Exception # Or if no common element is _valid_ and common state of your application # you could simply return None and test return value # raise Exception('No common element found') return None 

And if you need all the common elements, you can do it just like this:

 >>> [i for i in x if i in y] [1, 2, 3] 
+9
source

Sorting is not the fastest way to do this, it is done in O (N) time using a set (hash map).

 >>> x = [8,2,3,4,5] >>> y = [6,3,7,2,1] >>> set_y = set(y) >>> next((a for a in x if a in set_y), None) 2 

Or:

 next(ifilter(set(y).__contains__, x), None) 

This is what it does:

 >>> def foo(x, y): seen = set(y) for item in x: if item in seen: return item else: return None >>> foo(x, y) 2 

To show the time difference between different methods (naive approach, binary set search), here are some timings. I had to do this in order to refute an amazing number of people who thought binary search was faster ...:

 from itertools import ifilter from bisect import bisect_left a = [1, 2, 3, 9, 1, 1] * 100000 b = [44, 11, 23, 9, 10, 99] * 10000 c = [1, 7, 2, 4, 1, 9, 9, 2] * 1000000 # repeats early d = [7, 6, 11, 13, 19, 10, 19] * 1000000 e = range(50000) f = range(40000, 90000) # repeats in the middle g = [1] * 10000000 # no repeats at all h = [2] * 10000000 from random import randrange i = [randrange(10000000) for _ in xrange(5000000)] # some randoms j = [randrange(10000000) for _ in xrange(5000000)] def common_set(x, y, ifilter=ifilter, set=set, next=next): return next(ifilter(set(y).__contains__, x), None) pass def common_b_sort(x, y, bisect=bisect_left, sorted=sorted, min=min, len=len): sorted_y = sorted(y) for a in x: if a == sorted_y[min(bisect_left(sorted_y, a),len(sorted_y)-1)]: return a else: return None def common_naive(x, y): for a in x: for b in y: if a == b: return a else: return None from timeit import timeit from itertools import repeat import threading, thread print 'running tests - time limit of 20 seconds' for x, y in [('a', 'b'), ('c', 'd'), ('e', 'f'), ('g', 'h'), ('i', 'j')]: for func in ('common_set', 'common_b_sort', 'common_naive'): try: timer = threading.Timer(20, thread.interrupt_main) # 20 second time limit timer.start() res = timeit(stmt="print '[', {0}({1}, {2}), ".format(func, x, y), setup='from __main__ import common_set, common_b_sort, common_naive, {0}, {1}'.format(x, y), number=1) except: res = "Too long!!" finally: print '] Function: {0}, {1}, {2}. Time: {3}'.format(func, x, y, res) timer.cancel() 

The test data was:

 a = [1, 2, 3, 9, 1, 1] * 100000 b = [44, 11, 23, 9, 10, 99] * 10000 c = [1, 7, 2, 4, 1, 9, 9, 2] * 1000000 # repeats early d = [7, 6, 11, 13, 19, 10, 19] * 1000000 e = range(50000) f = range(40000, 90000) # repeats in the middle g = [1] * 10000000 # no repeats at all h = [2] * 10000000 from random import randrange i = [randrange(10000000) for _ in xrange(5000000)] # some randoms j = [randrange(10000000) for _ in xrange(5000000)] 

Results:

 running tests - time limit of 20 seconds [ 9 ] Function: common_set, a, b. Time: 0.00569520707241 [ 9 ] Function: common_b_sort, a, b. Time: 0.0182240340602 [ 9 ] Function: common_naive, a, b. Time: 0.00978832505249 [ 7 ] Function: common_set, c, d. Time: 0.249175872911 [ 7 ] Function: common_b_sort, c, d. Time: 1.86735751332 [ 7 ] Function: common_naive, c, d. Time: 0.264309220865 [ 40000 ] Function: common_set, e, f. Time: 0.00966861710078 [ 40000 ] Function: common_b_sort, e, f. Time: 0.0505980508696 [ ] Function: common_naive, e, f. Time: Too long!! [ None ] Function: common_set, g, h. Time: 1.11300018578 [ None ] Function: common_b_sort, g, h. Time: 14.9472068377 [ ] Function: common_naive, g, h. Time: Too long!! [ 5411743 ] Function: common_set, i, j. Time: 1.88894859542 [ 5411743 ] Function: common_b_sort, i, j. Time: 6.28617268396 [ 5411743 ] Function: common_naive, i, j. Time: 1.11231867458 

This gives you an idea of ​​how it will scale for large inputs, O (N) vs O (N log N) vs O (N ^ 2)

+8
source

One insert using next to get the first element from the generator:

 x = [8,2,3,4,5] y = [6,3,7,2,1] first = next((a for a in x if a in y), None) 

Or more efficient, since set.__contains__ faster than list.__contains__ :

 set_y = set(y) first = next((a for a in x if a in set_y), None) 

Or more efficiently, but still on the same line (do not do this):

 first = next((lambda set_y: a for a in x if a in set_y)(set(y)), None) 
+6
source

Using for loops with in will cause O(N^2) complexity, but you can sort y here and use binary search to improve the time complexity to O(NlogN) .

 def binary_search(lis,num): low=0 high=len(lis)-1 ret=-1 #return -1 if item is not found while low<=high: mid=(low+high)//2 if num<lis[mid]: high=mid-1 elif num>lis[mid]: low=mid+1 else: ret=mid break return ret x = [8,2,3,4,5] y = [6,3,7,2,1] y.sort() for z in x: ind=binary_search(y,z) if ind!=-1 print z break 

Output: 2

Using the bisect module to perform the same function as above:

 import bisect x = [8,2,3,4,5] y = [6,3,7,2,1] y.sort() for z in x: ind=bisect.bisect(y,z)-1 #or use `ind=min(bisect.bisect_left(y, z), len(y) - 1)` if ind!=-1 and y[ind] ==z: print z #prints 2 break 
+3
source

I assume that you want to teach this person Python, not just programming. So I'm not shy about using zip instead of ugly loop variables; this is a very useful part of Python and not hard to explain.

 def first_common(x, y): common = set(x) & set(y) for current_x, current_y in zip(x, y): if current_x in common: return current_x elif current_y in common: return current_y print first_common([8,2,3,4,5], [6,3,7,2,1]) 

If you really don't want to use zip , here's how to do it:

 def first_common2(x, y): common = set(x) & set(y) for i in xrange(min(len(x), len(y))): if x[i] in common: return x[i] elif y[i] in common: return y[i] 

And for those who are interested, so it applies to any number of sequences:

 def first_common3(*seqs): common = set.intersection(*[set(seq) for seq in seqs]) for current_elements in zip(*seqs): for element in current_elements: if element in common: return element 

Finally, note that unlike some other solutions, this also works if the first common item appears in the second list.

I just noticed your update, which makes an even simpler solution:

 def first_common4(x, y): ys = set(y) # We don't want this to be recreated for each element in x for element in x: if element in ys: return element 

The above is probably more readable than the expression of the generator.

Too bad, no built-in ordered set. This would make a more elegant solution.

+3
source

Using for loops seems the easiest way to explain to someone new.

 for number1 in x: for number2 in y: if number1 == number2: print number1, number2 print x.index(number1), y.index(number2) exit(0) print "No common numbers found." 

NB Not tested, just out of my head.

+1
source

This one uses arrays. It returns the first common element or None if there is no common element.

 def findcommon(x,y): common = None for i in range(0,max(len(x),len(y))): common = set(x[0:i]).intersection(set(y[0:i])) if common: break return list(common)[0] if common else None 
+1
source
 def first_common_element(x,y): common = set(x).intersection(set(y)) if common: return x[min([x.index(i)for i in common])] 
+1
source

Just for fun (probably inefficient), another version using itertools :

 from itertools import dropwhile, product from operator import __ne__ def accept_pair(f): "Make a version of f that takes a pair instead of 2 arguments." def accepting_pair(pair): return f(*pair) return accepting_pair def get_first_common(x, y): try: # I think this *_ unpacking syntax works only in Python 3 ((first_common, _), *_) = dropwhile( accept_pair(__ne__), product(x, y)) except ValueError: return None return first_common x = [8, 2, 3, 4, 5] y = [6, 3, 7, 2, 1] print(get_first_common(x, y)) # 2 y = [6, 7, 1] print(get_first_common(x, y)) # None 

It’s easier, but not so fun, to use a lambda pair: pair[0] != pair[1] instead of accept_pair(__ne__) .

+1
source

Using set is a general solution for an arbitrary number of lists:

 def first_common(*lsts): common = reduce(lambda c, l: c & set(l), lsts[1:], set(lsts[0])) if not common: return None firsts = [min(lst.index(el) for el in common) for lst in lsts] index_in_list = min(firsts) trgt_lst_index = firsts.index(index_in_list) return lsts[trgt_lst_index][index_in_list] 

The following thought is not an effective solution, it reduces excess overhead

 def first_common(*lsts): common = reduce(lambda c, l: c & set(l), lsts[1:], set(lsts[0])) if not common: return None for lsts_slice in itertools.izip_longest(*lsts): slice_intersection = common.intersection(lsts_slice) if slice_intersection: return slice_intersection.pop() 
0
source

All Articles