sam.ds.chen 发表于 2013-2-7 23:48:58

Programmer VS DBA

 
...
 
 
class LabDataLine {    static col_names = ['LABDT','LABTM','REQUISITIONNO','SUBJECT','SUBJECTINITIALS','SEX','SUBJECTDOB','SPONSOR','PROTOCOL','PHYSICIANNM','SITENO','VISIT','VISITNO','GROUPPNL','TESTNM','RANGEN','RESULTTXT','RESULTREAL','RANGELOW','RANGEHI','FLAG','UNIT','FOOTNOTE','LOCCODE','LOCADDR']    static pks = ['PHYSICIANNM','SITENO','SUBJECT','VISIT','VISITNO','LABDT','LABTM','GROUPPNL','TESTNM'], cols = [:]    static{col_names.eachWithIndex{o,i->      cols=i    }}    static REGEXP = ['SUBJECT':/00\d{1}\-\d{4}/,'VISITNO':/^NA$|^{1}\d*/,'LABDT':/(({3}|{2}{1}|{1}{2}|{3})(((0|1)(0||3))|((0|11)(0||30))|(02(0||2))))|((({2})(0||)|((0||)00))0229)/,'LABTM':/^$|((0)|(1)|(2))()()/,'RESULTREAL':/^$|^[-+]?\d+(\.\d+)?$/,'RANGELOW':/^$|^[-+]?\d+(\.\d+)?$/,'RANGEHI':/^$|^[-+]?\d+(\.\d+)?$/]    def location, line, arr    public LabDataLine(m){      location = m.location      line = m.line      arr = this.line.split('\\|')    }    public static String genHd() {      def s = []      col_names.each{s << it}      "<th>${s.join('</th><th>')}</th>"    }    public String toString(){      def s = []      col_names.each{            def data = arr]            s << (data==''?' ':data)      }      "<td>${s.join('</td><td>')}</td>"    }    public String toString(highlightedProps){      def s = []      col_names.each{            def data = arr]            def hl = highlightedProps.contains(it)            s << (data==''?' ':"${hl?'<b>':''}$data${hl?'</b>':''}")      }      "<td>${s.join('</td><td>')}</td>"    }    public int hashCode(){      def s = []      pks.each{s << arr]}      s.join('|').hashCode()    }    public boolean equals(Object o){      this.hashCode() == o.hashCode()    }}def startAt = new Date().timedef files = new File('../data').listFiles().findAll{it.name=~/(?i)\.txt$/}files.sort{f1,f2->f1.lastModified()<=>f2.lastModified()}def dataFiles = , files[-2]].sort{f1,f2->f1.name<=>f2.name}def dataset = [:], h2dl = [[:],[:],[:]]// preparation(-2..-1).each{i->    dataFiles.eachLine{l,n->      if(n>1){            def location = "$n@file${i+3}"            def dl = new LabDataLine(location:location,line:l)            dataset = dl            def hc = l.hashCode()            h2dl = h2dl?:[]            h2dl << location            def hc2 = dl.hashCode()            h2dl[-1] = h2dl[-1]?:[:]            h2dl[-1] = h2dl[-1] ?: []            h2dl[-1] << dl      }    }}def report = []report << "<html><head><title>Lab Data Validation Report</title><style type=\"text/css\">table, tr, td {border-width: 1px;border-spacing: 2px;border-style: outset;border-color: blue;border-collapse: separate;}th{background-color:red;}.even-row{background-color:#CECEF6;}.odd-row{background-color:#A9D0F5;}</style></head><body>file1: ${dataFiles.name}<br>file2: ${dataFiles.name}<hr>"//#1. unit validationreport << "---unit validation---<br>"report << "<table><tr><th>Location</th><th>Field</th><th>Value</th></tr>"def j=0dataset.each{dlkv->    def dl = dlkv.value    dl.pks.each{pk->      if(dl.arr].trim() == ''){            report << "<tr class=\"${j++%2==0?'even-row':'odd-row'}\"><td>${dl.location}</td><td>$pk</td><td>NULL</td></tr>"      }    }    dl.REGEXP.each{kv->      if(!(dl.arr] =~ kv.value)){            report << "<tr class=\"${j++%2==0?'even-row':'odd-row'}\"><td>${dl.location}</td><td>${kv.key}</td><td>${dl.arr]}</td><tr>"      }    }}report << "</table><hr>"//#2. integration validation :: duplicated lines in each filereport << "---duplicated lines in each file---<br>"report << "<table><tr><th>Location</th>${LabDataLine.genHd()}</tr>"(0..1).each{i->    h2dl.each{kv->      if(kv.value.size()>1){            def tmp = []            kv.value.each{e->                tmp << "<tr><td>$e</td>${dataset}</tr>"            }            report << tmp      }    }}report << "</table><hr>"//#3. integration validation :: unequal lines with equal PKs in the two filesreport << "---unequal lines with equal PKs in the two files---<br>"report << "<table><tr><th>Location</th>${LabDataLine.genHd()}</tr>"def seq1 = 1, colors = ['Aqua', 'Orange'], ci = 0h2dl[-1].each{    if(it.value.size()>1){         def seq2 = 1         it.value.each{dl->            def d = dl.value            def pkgrp = it.value            def highlightedProps = []            d.col_names.each{col->                if(pkgrp.findAll{kv->kv.value.arr]==d.arr]}.size()==1){                  highlightedProps << col                }            }            report << "<tr style=\"background:${colors};\"><td>${seq1}.${seq2}<br>${dl.value.location.join('<br>')}</td>${dl.value.toString(highlightedProps)}</tr>"            seq2++         }         ci = seq1++%2    }}report << "</table><hr>"//#4. integration validation :: lines in file1 but not in file2(by PKs)report << "---lines in file1 but not in file2(by PKs)---<br>"report << "<table><tr><th>Location</th>${LabDataLine.genHd()}</tr>"def hss = dataset.each{    hss << it.value}hss.minus(hss).eachWithIndex{o,i->    report << "<tr class=\"${i%2==0?'even-row':'odd-row'}\"><td>${o.location}</td>${o}</tr>"}report << "</table><hr>"def msg = "Validation script by S.C. Global Research Services, LLC.<br>Time elapsed: ${new Date().time - startAt} ms"report << "<p>$msg</p>"new File("../output/report.htm").write(report.join('\n')) 
...
 
页: [1]
查看完整版本: Programmer VS DBA