Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ctools/cmerge.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
167 lines (137 sloc)
5 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from __future__ import print_function | |
#----------------------------------------------------------------------- | |
# Imports | |
#----------------------------------------------------------------------- | |
import sys | |
import getopt | |
#----------------------------------------------------------------------- | |
# Defaults | |
#----------------------------------------------------------------------- | |
DEF_SEPARATOR = '/' | |
#----------------------------------------------------------------------- | |
# Functions | |
#----------------------------------------------------------------------- | |
def Usage(msg=None): | |
print(""" | |
cmerge [-d] [-s SEP] [-N KEY[/KEY/...]] KEY FILE1 FILE2 [FILE3 ...] | |
Read clauses from FILE1, FILE2, etc. Match clauses | |
between files according to the value field for KEY, | |
and merge all their fields. | |
Print results to standard out | |
OPTION | |
-s SEP Character used to separate value in multi-valued fields. Default value is '%s'. | |
-N KEYSTR N can range from 1 to 9. KEYSTR contains list of | |
key values to read from file 1, or file 2, etc, up to file 9. | |
Values are separated by either a '/' or ','. Both can be | |
used in the same KEYSTR. | |
-d Multivalued fields within a record (that is, fields with with | |
identical keys) are listed separately. By default they are | |
joined together using a separator. | |
""" % DEF_SEPARATOR) | |
if msg: | |
print() | |
print(msg) | |
print() | |
raise SystemExit | |
# Read clauses and file cnt | |
def read_clauses(files,lower_case=False): | |
line_no = 0 | |
for filecnt, fname in enumerate(files): | |
clause = [] | |
# Open file | |
if fname=="-": | |
fin = sys.stdin | |
else: | |
fin = open(fname,"r") | |
for line in fin.readlines(): | |
line_no += 1 | |
line = line.strip() | |
# End of clause | |
if not line: | |
if clause: | |
yield filecnt, clause | |
clause = [] | |
# Add line to clause | |
if lower_case: | |
clause.append((line_no, line.lower())) | |
else: | |
clause.append((line_no, line)) | |
# Close file | |
if not fname=="-": | |
fin.close() | |
# return last clause in file | |
if clause: | |
yield filecnt, clause | |
clause = [] | |
def get_clause_key_value(clause, keystr): | |
keystr += ':' | |
for lineno, clausestr in clause: | |
if clausestr.startswith(keystr): | |
return clausestr[len(keystr):].strip() | |
# No key instance found | |
return None | |
# Read clause, return all key,values in pairs[], and | |
# return value associated with the key 'keystr' | |
def get_split_clause(keystr,clause): | |
pairs = [] | |
key_value = None | |
for lineno, fieldstr in clause: | |
if not fieldstr=='' and ':' in fieldstr: | |
k, v = fieldstr.split(":",1) | |
k, v = k.strip(), v.strip() | |
pairs.append((k,v) ) | |
if k==keystr: | |
key_value = v | |
return key_value, pairs | |
def split_keystr(keystr): | |
keys = keystr.lower().strip("/").strip(",").split("/") | |
allkeys = [] | |
for k in keys: | |
allkeys += k.split(",") | |
return allkeys # Keys delimited by either ',' or '/' | |
#----------------------------------------------------------------------- | |
# Main | |
#----------------------------------------------------------------------- | |
def main(): | |
# Read options | |
opts, args = getopt.getopt(sys.argv[1:], 'ds:1:2:3:4:5:6:7:8:9:N:') | |
if len(args)<3: Usage() | |
opts = dict(opts) | |
keystr, fnames = args[0], args[1:] | |
merged_clauses = {} | |
clause_key_value = None | |
separator = opts['-s'] if '-s' in opts else DEF_SEPARATOR | |
file_fields = 9*[None] | |
for n in range(1,10): | |
optkey = "-%1d" % n | |
if optkey in opts: file_fields[n-1] = split_keystr(opts[optkey]) | |
for filecnt, clause in read_clauses(fnames): | |
# Empty clause, this marks end of previous clause | |
if not (len(clause)==1 and clause[0][1]==''): | |
key_value, pairs = get_split_clause(keystr, clause) # key_value is the value assoc with the primary key | |
# If this clause contains the request key field value, | |
# merge into merged_claues | |
if key_value: | |
if not key_value in merged_clauses: | |
merged_clauses[key_value] = {} | |
for k,v in pairs: | |
# Check key if we're filtering for keys (-1, -2, etc options) | |
if file_fields[filecnt]==None or k.lower() in file_fields[filecnt]: | |
if k in merged_clauses[key_value]: | |
if not v in merged_clauses[key_value][k]: | |
merged_clauses[key_value][k].append(v) | |
else: | |
merged_clauses[key_value][k] = [v] | |
# Print merged clauses | |
for key_value in sorted(merged_clauses.keys()): | |
print() | |
print("%s: %s" % (keystr, key_value)) | |
for k,values in sorted(merged_clauses[key_value].items()): | |
if not k==keystr: | |
if '-d' in opts: | |
for value in values: | |
print( "%s: %s" % (k, value) ) | |
else: | |
print( "%s: %s" % (k, separator.join(values)) ) | |
main() |