Combine multiple values ​​into one record without duplication

I have a dbf table, as shown below, which is the result of joining several of the two tables. I want to have unique zone values ​​from a single Taxlot identifier field.

table name: entry table
tid ----- zone
1 ------ A
1 ------ A
1 ------ B
1 ------ C
2 ----- - D
2 ------ E
3 ------ C

Desired output table table name: entry table
tid ----- zone
1 ------ A, B, C
2 ------ D, E
3 ------ C

I got some help but couldn't get it to work.

inputTbl = r"C:\temp\input.dbf"
taxIdZoningDict = {}
searchRows = gp.searchcursor(inputTbl)
searchRow = searchRows.next()
while searchRow:
   if searchRow.TID in taxIdZoningDict:
      taxIdZoningDict[searchRow.TID].add(searchRow.ZONE)
   else:
      taxIdZoningDict[searchRow.TID] = set() #a set prevents dulpicates!
      taxIdZoningDict[searchRow.TID].add(searchRow.ZONE)
   searchRow = searchRows.next()

outputTbl = r"C:\temp\output.dbf"
gp.CreateTable_management(r"C:\temp", "output.dbf")
gp.AddField_management(outputTbl, "TID", "LONG")
gp.AddField_management(outputTbl, "ZONES", "TEXT", "", "", "20")
tidList = taxIdZoningDict.keys()
tidList.sort() #sorts in ascending order
insertRows = gp.insertcursor(outputTbl)
for tid in tidList:
   concatString = ""
   for zone in taxIdZoningDict[tid]
      concatString = concatString + zone + ","
   insertRow = insertRows.newrow()
   insertRow.TID = tid
   insertRow.ZONES = concatString[:-1]
   insertRows.insertrow(insertRow)
del insertRow
del insertRows
+5
source share
6

dbf defaultdict, :

import dbf
from collections import defaltdict

inputTbl = dbf.Table(r'c:\temp\input.dbf')
taxIdZoning = defaultdict(set)

for record in inputTbl:
    taxIdZoning[record.tid].add(record.zone)
inputTbl.close()

outputTbl = dbf.Table(r'c:\temp\output.dbf', 'tid N(17.0), zones C(20)')
for tid in sorted(taxIdZoning):
    record = outputTbl.append()
    record.tid = tid
    record.zones = ','.join(sorted(taxIdZoning[tid]))
outputTbl.close()

: , , LONG, , , 17 .:) - .

+3

, Microsoft Access VBA, Microsoft Excel VBA. , . Access, Excel.

sDBF* sOutDBF*, .

Sub VBASolution()
    Dim oRS
    Dim sConn
    Dim sDBFPath, sOutDBFPath
    Dim sDBFName, sOutDBFName
    Dim oDict
    Dim curTID, curZone, sZones
    Dim oConn
    Dim oFS
    Dim sTableName

    sDBFPath = "C:\Path\To\DBFs\"
    sDBFName = "Input.dbf"

    sOutDBFPath = "C:\Path\To\DBFs\"
    sOutDBFName = "RESULTS.dbf"

    sConn = "Driver={Microsoft dBASE Driver (*.dbf)}; DriverID=277; Dbq="
    Set oRS = CreateObject("ADODB.Recordset")


    oRS.Open "SELECT DISTINCT tid, zone FROM " & sDBFName, sConn & sDBFPath

    Set oDict = CreateObject("Scripting.Dictionary")

    Do While Not oRS.EOF
        curTID = oRS.Fields("tid").Value
        curZone = oRS.Fields("zone").Value
        If Not oDict.Exists(curTID) Then
            oDict.Add curTID, CreateObject("Scripting.Dictionary")
        End If
        If Not oDict(curTID).Exists(curZone) Then
            oDict(curTID).Add curZone, curZone
        End If
        oRS.MoveNext
    Loop
    oRS.Close

    Set oRS = Nothing

    Set oConn = CreateObject("ADODB.Connection")
    oConn.Open sConn & sOutDBFPath

    'Delete the resultant DBF file if it already exists.
    Set oFS = CreateObject("Scripting.FileSystemObject")
    With oFS
        If .FileExists(sOutDBFPath & "\" & sOutDBFName) Then
            .DeleteFile sOutDBFPath & "\" & sOutDBFName
        End If
    End With

    sTableName = oFS.GetBaseName(sOutDBFName)

    oConn.Execute "CREATE TABLE " & sTableName & " (tid int, zone varchar(80))"

    Dim i, j
    For Each i In oDict.Keys
        curTID = i
        sZones = ""
        For Each j In oDict(i)
            sZones = sZones & "," & j
        Next
        sZones = Mid(sZones, 2)
        oConn.Execute "INSERT INTO " & sTableName & " (tid, zone) VALUES ('" & curTID & "','" & sZones & "')"
    Next
    oConn.Close

    Set oConn = Nothing
    Set oDict = Nothing
    Set oFS = Nothing
End Sub

EDIT: , , VBScript.VBS() Windows XP :

Call VBASolution()

, Office dbf Windows.

+2

, . defaultdict (set), dups, defaultdict (list) , ,.add() .append().

+1

:

taxIdZoningDict = {}
searchRows = gp.searchcursor(inputTbl)
searchRow = searchRows.next()
while searchRow:
   if searchRow.TID in taxIdZoningDict:
      taxIdZoningDict[searchRow.TID].add(searchRow.ZONE)
   else:
      taxIdZoningDict[searchRow.TID] = set() #a set prevents dulpicates!
      taxIdZoningDict[searchRow.TID].add(searchRow.ZONE)
   searchRow = searchRows.next()

:

zones = {}
for row in gp.searchcursor(inputTbl):
    zones.setdefault(row.TID, set())
    zones[row.TID].add(row.ZONE)

pythonic, ; -)

:

for k, v in zones:
    print k, ", ".join(v)
+1

Python, , .

import collections

d = collections.defaultdict(list)

with open("input_file.txt") as f:   
    for line in f:
        parsed = line.strip().split()
        print parsed
        k = parsed[0]
        v = parsed[2]
        d[k].append(v)

for k, v in sorted(d.iteritems()):
    s = " ----- "
    v = list(set(v)) # Must be a library function to do this
    v.sort()
    print k, s,
    for j in v:
        print j,
    print

,

0

OP . , , , , , v:

    print k, s, ",".join(v)

This does more on one line (possibly negative). Usage joinin this way is fairly common in python, and IMHO expresses intent more clearly (and is easier to digest when reading) than an explicit loop.

0
source

All Articles