Programmer VS DBA
...
class LabDataLine { static col_names = ['LABDT','LABTM','REQUISITIONNO','SUBJECT','SUBJECTINITIALS','SEX','SUBJECTDOB','SPONSOR','PROTOCOL','PHYSICIANNM','SITENO','VISIT','VISITNO','GROUPPNL','TESTNM','RANGEN','RESULTTXT','RESULTREAL','RANGELOW','RANGEHI','FLAG','UNIT','FOOTNOTE','LOCCODE','LOCADDR'] static pks = ['PHYSICIANNM','SITENO','SUBJECT','VISIT','VISITNO','LABDT','LABTM','GROUPPNL','TESTNM'], cols = [:] static{col_names.eachWithIndex{o,i-> cols=i }} static REGEXP = ['SUBJECT':/00\d{1}\-\d{4}/,'VISITNO':/^NA$|^{1}\d*/,'LABDT':/(({3}|{2}{1}|{1}{2}|{3})(((0|1)(0||3))|((0|11)(0||30))|(02(0||2))))|((({2})(0||)|((0||)00))0229)/,'LABTM':/^$|((0)|(1)|(2))()()/,'RESULTREAL':/^$|^[-+]?\d+(\.\d+)?$/,'RANGELOW':/^$|^[-+]?\d+(\.\d+)?$/,'RANGEHI':/^$|^[-+]?\d+(\.\d+)?$/] def location, line, arr public LabDataLine(m){ location = m.location line = m.line arr = this.line.split('\\|') } public static String genHd() { def s = [] col_names.each{s << it} "<th>${s.join('</th><th>')}</th>" } public String toString(){ def s = [] col_names.each{ def data = arr] s << (data==''?'&nbsp;':data) } "<td>${s.join('</td><td>')}</td>" } public String toString(highlightedProps){ def s = [] col_names.each{ def data = arr] def hl = highlightedProps.contains(it) s << (data==''?'&nbsp;':"${hl?'<b>':''}$data${hl?'</b>':''}") } "<td>${s.join('</td><td>')}</td>" } public int hashCode(){ def s = [] pks.each{s << arr]} s.join('|').hashCode() } public boolean equals(Object o){ this.hashCode() == o.hashCode() }}def startAt = new Date().timedef files = new File('../data').listFiles().findAll{it.name=~/(?i)\.txt$/}files.sort{f1,f2->f1.lastModified()<=>f2.lastModified()}def dataFiles = , files[-2]].sort{f1,f2->f1.name<=>f2.name}def dataset = [:], h2dl = [[:],[:],[:]]// preparation(-2..-1).each{i-> dataFiles.eachLine{l,n-> if(n>1){ def location = "$n@file${i+3}" def dl = new LabDataLine(location:location,line:l) dataset = dl def hc = l.hashCode() h2dl = h2dl?:[] h2dl << location def hc2 = dl.hashCode() h2dl[-1] = h2dl[-1]?:[:] h2dl[-1] = h2dl[-1] ?: [] h2dl[-1] << dl } }}def report = []report << "<html><head><title>Lab Data Validation Report</title><style type=\"text/css\">table, tr, td {border-width: 1px;border-spacing: 2px;border-style: outset;border-color: blue;border-collapse: separate;}th{background-color:red;}.even-row{background-color:#CECEF6;}.odd-row{background-color:#A9D0F5;}</style></head><body>file1: ${dataFiles.name}<br>file2: ${dataFiles.name}<hr>"//#1. unit validationreport << "---unit validation---<br>"report << "<table><tr><th>Location</th><th>Field</th><th>Value</th></tr>"def j=0dataset.each{dlkv-> def dl = dlkv.value dl.pks.each{pk-> if(dl.arr].trim() == ''){ report << "<tr class=\"${j++%2==0?'even-row':'odd-row'}\"><td>${dl.location}</td><td>$pk</td><td>NULL</td></tr>" } } dl.REGEXP.each{kv-> if(!(dl.arr] =~ kv.value)){ report << "<tr class=\"${j++%2==0?'even-row':'odd-row'}\"><td>${dl.location}</td><td>${kv.key}</td><td>${dl.arr]}</td><tr>" } }}report << "</table><hr>"//#2. integration validation :: duplicated lines in each filereport << "---duplicated lines in each file---<br>"report << "<table><tr><th>Location</th>${LabDataLine.genHd()}</tr>"(0..1).each{i-> h2dl.each{kv-> if(kv.value.size()>1){ def tmp = [] kv.value.each{e-> tmp << "<tr><td>$e</td>${dataset}</tr>" } report << tmp } }}report << "</table><hr>"//#3. integration validation :: unequal lines with equal PKs in the two filesreport << "---unequal lines with equal PKs in the two files---<br>"report << "<table><tr><th>Location</th>${LabDataLine.genHd()}</tr>"def seq1 = 1, colors = ['Aqua', 'Orange'], ci = 0h2dl[-1].each{ if(it.value.size()>1){ def seq2 = 1 it.value.each{dl-> def d = dl.value def pkgrp = it.value def highlightedProps = [] d.col_names.each{col-> if(pkgrp.findAll{kv->kv.value.arr]==d.arr]}.size()==1){ highlightedProps << col } } report << "<tr style=\"background:${colors};\"><td>${seq1}.${seq2}<br>${dl.value.location.join('<br>')}</td>${dl.value.toString(highlightedProps)}</tr>" seq2++ } ci = seq1++%2 }}report << "</table><hr>"//#4. integration validation :: lines in file1 but not in file2(by PKs)report << "---lines in file1 but not in file2(by PKs)---<br>"report << "<table><tr><th>Location</th>${LabDataLine.genHd()}</tr>"def hss = dataset.each{ hss << it.value}hss.minus(hss).eachWithIndex{o,i-> report << "<tr class=\"${i%2==0?'even-row':'odd-row'}\"><td>${o.location}</td>${o}</tr>"}report << "</table><hr>"def msg = "Validation script by S.C. Global Research Services, LLC.<br>Time elapsed: ${new Date().time - startAt} ms"report << "<p>$msg</p>"new File("../output/report.htm").write(report.join('\n'))
...
页:
[1]