72 lines
3.1 KiB
Python
72 lines
3.1 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import re
|
|
import util
|
|
import autoproc
|
|
|
|
def parse_xml_rcrs(text, summary):
|
|
summary = {row['uid']: row for row in summary}
|
|
for patient in re.finditer(r'<Patient>(.+?)</Patient>', text, re.DOTALL):
|
|
patient = patient.group(1)
|
|
if (pos := patient.find('<Tumor>')) >= 0:
|
|
data = parse_xml_items(patient[:pos])
|
|
data['tumors'] = [parse_xml_items(tumor.group(1)) for tumor in re.finditer(r'<Tumor>(.+?)</Tumor>', patient[pos:], re.DOTALL)]
|
|
for tumor in data['tumors']:
|
|
acc = tumor['accessionNumberHosp']
|
|
meta = tumor['meta'] = summary[acc[:4] + '-' + acc[4:] + '/' + tumor['sequenceNumberHospital']]
|
|
data['name'] = meta.pop('name')
|
|
data['last5'] = meta.pop('last5')
|
|
else:
|
|
data = parse_xml_items(patient)
|
|
data['tumors'] = []
|
|
yield data
|
|
|
|
def parse_xml_items(text):
|
|
return {item.group(1): item.group(2) for item in re.finditer(r'<Item naaccrId="([^"]+)">([^<]+)</Item>', text)}
|
|
|
|
async def cmd_patients(proc, alpha, omega):
|
|
"""Fetch patients from RCRS dump"""
|
|
async with proc.sendline, autoproc.expect_async(proc) as expect:
|
|
proc.sendline('^Create RCRS extract')
|
|
async for prompt, response in expect.prompts_any({
|
|
re.compile(r'^ Facility Identification Number \(FIN\): \d+// $'): None,
|
|
' Select date field to be used for Start/End range: ': 'Date DX',
|
|
re.compile(r'^ Start, Date DX: : \([^\)\r\n]+\): $'): util.vista_strftime(alpha),
|
|
re.compile(r'^ End, Date DX: : \([^\)\r\n]+\): TODAY// $'): util.vista_strftime(omega),
|
|
' Are these settings correct? YES// ': None,
|
|
}, throw=True):
|
|
proc.sendline(response)
|
|
if prompt.index == 4:
|
|
break
|
|
assert await expect.endswith('\r\n --------------------------------------------------------------\r\n\r\n\r\n')
|
|
proc.sendline() # skip delay
|
|
assert (m := await expect.endswith('\r\n\x1a'))
|
|
doc_rcrs = m.before
|
|
assert await expect.endswith('\r\nDEVICE: ', timeout_settle=31)
|
|
proc.sendline('HOME;80') # default HOME
|
|
assert await expect.earliest('HOME(CRT)\r\n')
|
|
summary = []
|
|
async for prompt, response in expect.prompts_any({
|
|
'\x07': None,
|
|
'Type <Enter> to continue or \'^\' to exit: ': None,
|
|
' Press \'RETURN\' to continue, \'^\' to stop: ': None,
|
|
'Select *..Utility Options Option: ': None,
|
|
'Select DHCP Tumor Registry Option: ': None,
|
|
}, throw=True):
|
|
proc.sendline(response)
|
|
if prompt.index == 0 or prompt.index == 1:
|
|
summary.extend({k.strip(): v.strip() for k, v in row.groupdict().items()} for row in re.finditer(r'(?P<last5>[A-Z]\d{4}) (?P<name>[^\r\n]{30}) (?P<uid>[^ \r\n]+) (?P<primarySite>[^ \r\n]+) (?P<dateOfDiagnosis>\d{2}/\d{2}/\d{4}) (?P<dateCaseLastChanged>\d{2}/\d{2}/\d{4})', prompt.before))
|
|
elif prompt.index == 4:
|
|
break
|
|
assert await expect.endswith('\r\nSelect Patient Information and OE/RR Option: ', '\r\nSelect Patient Information and OE/RR <TEST ACCOUNT> Option: ')
|
|
expect.clear()
|
|
for item in parse_xml_rcrs(doc_rcrs, summary):
|
|
yield item
|
|
|
|
async def cmd_tumors(proc, alpha, omega):
|
|
async for patient in cmd_patients(proc, alpha, omega):
|
|
tumors = patient.pop('tumors')
|
|
for item in tumors:
|
|
item['patient'] = patient
|
|
yield item
|