Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 167 lines (137 sloc) 5 KB
#!/usr/bin/python
from __future__ import print_function
#-----------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------
import sys
import getopt
#-----------------------------------------------------------------------
# Defaults
#-----------------------------------------------------------------------
DEF_SEPARATOR = '/'
#-----------------------------------------------------------------------
# Functions
#-----------------------------------------------------------------------
def Usage(msg=None):
print("""
cmerge [-d] [-s SEP] [-N KEY[/KEY/...]] KEY FILE1 FILE2 [FILE3 ...]
Read clauses from FILE1, FILE2, etc. Match clauses
between files according to the value field for KEY,
and merge all their fields.
Print results to standard out
OPTION
-s SEP Character used to separate value in multi-valued fields. Default value is '%s'.
-N KEYSTR N can range from 1 to 9. KEYSTR contains list of
key values to read from file 1, or file 2, etc, up to file 9.
Values are separated by either a '/' or ','. Both can be
used in the same KEYSTR.
-d Multivalued fields within a record (that is, fields with with
identical keys) are listed separately. By default they are
joined together using a separator.
""" % DEF_SEPARATOR)
if msg:
print()
print(msg)
print()
raise SystemExit
# Read clauses and file cnt
def read_clauses(files,lower_case=False):
line_no = 0
for filecnt, fname in enumerate(files):
clause = []
# Open file
if fname=="-":
fin = sys.stdin
else:
fin = open(fname,"r")
for line in fin.readlines():
line_no += 1
line = line.strip()
# End of clause
if not line:
if clause:
yield filecnt, clause
clause = []
# Add line to clause
if lower_case:
clause.append((line_no, line.lower()))
else:
clause.append((line_no, line))
# Close file
if not fname=="-":
fin.close()
# return last clause in file
if clause:
yield filecnt, clause
clause = []
def get_clause_key_value(clause, keystr):
keystr += ':'
for lineno, clausestr in clause:
if clausestr.startswith(keystr):
return clausestr[len(keystr):].strip()
# No key instance found
return None
# Read clause, return all key,values in pairs[], and
# return value associated with the key 'keystr'
def get_split_clause(keystr,clause):
pairs = []
key_value = None
for lineno, fieldstr in clause:
if not fieldstr=='' and ':' in fieldstr:
k, v = fieldstr.split(":",1)
k, v = k.strip(), v.strip()
pairs.append((k,v) )
if k==keystr:
key_value = v
return key_value, pairs
def split_keystr(keystr):
keys = keystr.lower().strip("/").strip(",").split("/")
allkeys = []
for k in keys:
allkeys += k.split(",")
return allkeys # Keys delimited by either ',' or '/'
#-----------------------------------------------------------------------
# Main
#-----------------------------------------------------------------------
def main():
# Read options
opts, args = getopt.getopt(sys.argv[1:], 'ds:1:2:3:4:5:6:7:8:9:N:')
if len(args)<3: Usage()
opts = dict(opts)
keystr, fnames = args[0], args[1:]
merged_clauses = {}
clause_key_value = None
separator = opts['-s'] if '-s' in opts else DEF_SEPARATOR
file_fields = 9*[None]
for n in range(1,10):
optkey = "-%1d" % n
if optkey in opts: file_fields[n-1] = split_keystr(opts[optkey])
for filecnt, clause in read_clauses(fnames):
# Empty clause, this marks end of previous clause
if not (len(clause)==1 and clause[0][1]==''):
key_value, pairs = get_split_clause(keystr, clause) # key_value is the value assoc with the primary key
# If this clause contains the request key field value,
# merge into merged_claues
if key_value:
if not key_value in merged_clauses:
merged_clauses[key_value] = {}
for k,v in pairs:
# Check key if we're filtering for keys (-1, -2, etc options)
if file_fields[filecnt]==None or k.lower() in file_fields[filecnt]:
if k in merged_clauses[key_value]:
if not v in merged_clauses[key_value][k]:
merged_clauses[key_value][k].append(v)
else:
merged_clauses[key_value][k] = [v]
# Print merged clauses
for key_value in sorted(merged_clauses.keys()):
print()
print("%s: %s" % (keystr, key_value))
for k,values in sorted(merged_clauses[key_value].items()):
if not k==keystr:
if '-d' in opts:
for value in values:
print( "%s: %s" % (k, value) )
else:
print( "%s: %s" % (k, separator.join(values)) )
main()