#!/usr/bin/env python3 import re import util import autoproc def parse_xml_rcrs(text, summary): summary = {row['uid']: row for row in summary} for patient in re.finditer(r'(.+?)', text, re.DOTALL): patient = patient.group(1) if (pos := patient.find('')) >= 0: data = parse_xml_items(patient[:pos]) data['tumors'] = [parse_xml_items(tumor.group(1)) for tumor in re.finditer(r'(.+?)', patient[pos:], re.DOTALL)] for tumor in data['tumors']: acc = tumor['accessionNumberHosp'] meta = tumor['meta'] = summary[acc[:4] + '-' + acc[4:] + '/' + tumor['sequenceNumberHospital']] data['name'] = meta.pop('name') data['last5'] = meta.pop('last5') else: data = parse_xml_items(patient) data['tumors'] = [] yield data def parse_xml_items(text): return {item.group(1): item.group(2) for item in re.finditer(r'([^<]+)', text)} async def cmd_patients(proc, alpha, omega): """Fetch patients from RCRS dump""" async with proc.sendline, autoproc.expect_async(proc) as expect: proc.sendline('^Create RCRS extract') async for prompt, response in expect.prompts_any({ re.compile(r'^ Facility Identification Number \(FIN\): \d+// $'): None, ' Select date field to be used for Start/End range: ': 'Date DX', re.compile(r'^ Start, Date DX: : \([^\)\r\n]+\): $'): util.vista_strftime(alpha), re.compile(r'^ End, Date DX: : \([^\)\r\n]+\): TODAY// $'): util.vista_strftime(omega), ' Are these settings correct? YES// ': None, }, throw=True): proc.sendline(response) if prompt.index == 4: break assert await expect.endswith('\r\n --------------------------------------------------------------\r\n\r\n\r\n') proc.sendline() # skip delay assert (m := await expect.endswith('\r\n\x1a')) doc_rcrs = m.before assert await expect.endswith('\r\nDEVICE: ', timeout_settle=31) proc.sendline('HOME;80') # default HOME assert await expect.earliest('HOME(CRT)\r\n') summary = [] async for prompt, response in expect.prompts_any({ '\x07': None, 'Type to continue or \'^\' to exit: ': None, ' Press \'RETURN\' to continue, \'^\' to stop: ': None, 'Select *..Utility Options Option: ': None, 'Select DHCP Tumor Registry Option: ': None, }, throw=True): proc.sendline(response) if prompt.index == 0 or prompt.index == 1: summary.extend({k.strip(): v.strip() for k, v in row.groupdict().items()} for row in re.finditer(r'(?P[A-Z]\d{4}) (?P[^\r\n]{30}) (?P[^ \r\n]+) (?P[^ \r\n]+) (?P\d{2}/\d{2}/\d{4}) (?P\d{2}/\d{2}/\d{4})', prompt.before)) elif prompt.index == 4: break async for prompt, response in expect.promptmatches(( (re.compile(r' Press \'RETURN\' to continue, \'\^\' to stop: $'), None), ('Select Patient Information and OE/RR Option: ', None, True), ('Select Patient Information and OE/RR Option: ', None, True), ), throw=True): if prompt.index == 0: proc.sendline(response) expect.clear() for item in parse_xml_rcrs(doc_rcrs, summary): yield item async def cmd_tumors(proc, alpha, omega): async for patient in cmd_patients(proc, alpha, omega): tumors = patient.pop('tumors') for item in tumors: item['patient'] = patient yield item