Extract numbers from a chemical formula

Sorry if this has already been asked and answered, but I could not find a satisfactory answer.

I have a list of chemical formulas, including in this order: C, H, N, and O. And I would like to draw a number after each of these letters. The problem is that not all formulas contain N. All contain C, H and O. And the number can be either single or double (or only in case of H) a triple digit.

So the data looks like this:

  • C20H37N1O5
  • C10H12O3
  • C20H19N3O4
  • C23H40O3
  • C9H13N1O3
  • C14H26O4
  • C58H100N2O9

I need each item number for a list in separate columns. Therefore, in the first example, it will be:

20 37 1 5

I've tried:

=IFERROR(MID(LEFT(A2,FIND("H",A2)-1),FIND("C",A2)+1,LEN(A2)),"") 

to separate C #. However, after that I get stuck as H # is surrounded by either O or N.

Is there an excel or VBA formula that can do this?

+6
6

(regex). VBA , Windows.

  • , enter image description here

  • Microsoft VBScript Regular Expression 5.5 enter image description here

  • Option Explicit 
    
    Public Function ChemRegex(ChemFormula As String, Element As String) As Long
        Dim strPattern As String
        strPattern = "([CNHO])([0-9]*)" 
                     'this pattern is limited to the elements C, N, H and O only.
        Dim regEx As New RegExp
    
        Dim Matches As MatchCollection, m As Match
    
        If strPattern <> "" Then
            With regEx
                .Global = True
                .MultiLine = True
                .IgnoreCase = False
                .Pattern = strPattern
            End With
    
            Set Matches = regEx.Execute(ChemFormula)
            For Each m In Matches
                If m.SubMatches(0) = Element Then
                    ChemRegex = IIf(Not m.SubMatches(1) = vbNullString, m.SubMatches(1), 1) 
                                'this IIF ensures that in CH4O the C and O are count as 1
                    Exit For
                End If
            Next m
        End If
    End Function
    
  • . B2: =ChemRegex($A2,B$1) enter image description here


, CH3OH CH2COOH

, - CH3OH, . H3 , .

, CH3OH CH2COOH ( ), , ...

If m.SubMatches(0) = Element Then
    ChemRegex = ChemRegex + IIf(Not m.SubMatches(1) = vbNullString, m.SubMatches(1), 1)
    'Exit For needs to be removed.
End If

enter image description here

, NaOH CaCl2

:

strPattern = "([A-Z][a-z]?)([0-9]*)"   'https://regex101.com/r/nNv8W6/2

enter image description here

  • , / . CaCl2 , CaCl2 CaCl2.
  • , , . , , . Xx2Zz5Q Xx = 2, Zz = 5 Q = 1.

    , , :

    strPattern = "([A][cglmrstu]|[B][aehikr]?|[C][adeflmnorsu]?|[D][bsy]|[E][rsu]|[F][elmr]?|[G][ade]|[H][efgos]?|[I][nr]?|[K][r]?|[L][airuv]|[M][cdgnot]|[N][abdehiop]?|[O][gs]?|[P][abdmortu]?|[R][abefghnu]|[S][bcegimnr]?|[T][abcehilms]|[U]|[V]|[W]|[X][e]|[Y][b]?|[Z][nr])([0-9]*)"
    'https://regex101.com/r/Hlzta2/3
    'This pattern includes all 118 elements up to today. 
    'If new elements are found/generated by scientist they need to be added to the pattern.
    
+9

, :

enter image description here

B2 .

=IFERROR(IFERROR(--(MID($A2,SEARCH(B$1,$A2)+1,3)),IFERROR(--(MID($A2,SEARCH(B$1,$A2)+1,2)),--MID($A2,SEARCH(B$1,$A2)+1,1))),0)

, ctrl + shift + enter

=MAX(IFERROR(--MID($A2,SEARCH(B$1,$A2)+1,ROW($A$1:$A$3)),0))

, VBA , - :

Public Function ElementCount(str As String, element As String) As Long
    Dim i As Integer
    Dim s As String

    For i = 1 To 3
        s = Mid(str, InStr(str, element) + 1, i)
        On Error Resume Next
        ElementCount = CLng(s)
        On Error GoTo 0
    Next i
End Function

:

=ElementCount(A1,"C")
+3

VBA, . , , Vityata, , , .

Option Explicit

Function find_associated_number(chemical_formula As Range, element As String) As Variant
  Dim regex As Object: Set regex = CreateObject("VBScript.RegExp")
  Dim pattern As String
  Dim matches As Object

  If Len(element) > 1 Or chemical_formula.CountLarge <> 1 Then
    find_associated_number = CVErr(xlErrName)
  Else
    pattern = element + "(\d+)\D"
    With regex
      .pattern = pattern
      .ignorecase = True
      If .test(chemical_formula) Then
        Set matches = .Execute(chemical_formula)
        find_associated_number = matches(0).submatches(0)
      Else
        find_associated_number = CVErr(xlErrNA)
      End If
    End With
  End If
End Function

, :

enter image description here

C , D - . , , .

+2

VBA - . Excel . . ,

C20H37NO5 20375, :

=IF(ISNUMBER(1*MID(A1,1,1)),MID(A1,1,1),"")&
IF(ISNUMBER(1*MID(A1,2,1)),MID(A1,2,1),"")&
IF(ISNUMBER(1*MID(A1,3,1)),MID(A1,3,1),"")&
IF(ISNUMBER(1*MID(A1,4,1)),MID(A1,4,1),"")&
IF(ISNUMBER(1*MID(A1,5,1)),MID(A1,5,1),"")&
IF(ISNUMBER(1*MID(A1,6,1)),MID(A1,6,1),"")&
IF(ISNUMBER(1*MID(A1,7,1)),MID(A1,7,1),"")&
IF(ISNUMBER(1*MID(A1,8,1)),MID(A1,8,1),"")&
IF(ISNUMBER(1*MID(A1,9,1)),MID(A1,9,1),"")

9 . 9, .

- 1*. , . , a 5 , 1, .

+1

.

Sub test()
    Dim vDB As Variant, vR() As Variant
    Dim s As String
    Dim vSplit As Variant
    Dim i As Long, n As Long, j As Integer

    vDB = Range("a2", Range("a" & Rows.Count).End(xlUp))

    n = UBound(vDB, 1)
    ReDim vR(1 To n, 1 To 4)
    For i = 1 To n
        s = vDB(i, 1)
        For j = 1 To Len(s)
            If Mid(s, j, 1) Like "[A-Z]" Then
                s = Replace(s, Mid(s, j, 1), " ")
            End If
        Next j
        vSplit = Split(s, " ")
        For j = 1 To UBound(vSplit)

            vR(i, j) = vSplit(j)
        Next j
    Next i
    Range("b2").Resize(n, 4) = vR
End Sub
+1

, vba , .

Sub GetMolecularFormulaNumbers()
    Dim rng As Range
    Dim RegExp As Object
    Dim match, matches
    Dim j As Long

    Set rng = Range(Cells(1, 1), Cells(Cells(Rows.Count, 1).End(xlUp).Row, 1))
    Set RegExp = CreateObject("vbscript.regexp")
    With RegExp
        .Pattern = "\d+"
        .IgnoreCase = True
        .Global = True

        For Each c In rng
            j = 0
            Set matches = .Execute(c)
            If matches.Count > 0 Then
                For Each match In matches
                    j = j + 1
                    c.Offset(0, j) = CInt(match)
                Next match
            End If
        Next c
    End With
End Sub
+1

All Articles