
78 lines
3.3 KiB
Raw Normal View History

2024-03-02 00:34:29 -05:00
#!/usr/bin/env python3
import re
import util
import autoproc
def parse_xml_rcrs(text, summary):
summary = {row['uid']: row for row in summary}
for patient in re.finditer(r'<Patient>(.+?)</Patient>', text, re.DOTALL):
patient =
if (pos := patient.find('<Tumor>')) >= 0:
data = parse_xml_items(patient[:pos])
data['tumors'] = [parse_xml_items( for tumor in re.finditer(r'<Tumor>(.+?)</Tumor>', patient[pos:], re.DOTALL)]
for tumor in data['tumors']:
acc = tumor['accessionNumberHosp']
meta = tumor['meta'] = summary[acc[:4] + '-' + acc[4:] + '/' + tumor['sequenceNumberHospital']]
data['name'] = meta.pop('name')
data['last5'] = meta.pop('last5')
data = parse_xml_items(patient)
data['tumors'] = []
yield data
def parse_xml_items(text):
return { for item in re.finditer(r'<Item naaccrId="([^"]+)">([^<]+)</Item>', text)}
async def cmd_patients(proc, alpha, omega):
"""Fetch patients from RCRS dump"""
async with proc.sendline, autoproc.expect_async(proc) as expect:
proc.sendline('^Create RCRS extract')
async for prompt, response in expect.prompts_any({
re.compile(r'^ Facility Identification Number \(FIN\): \d+// $'): None,
' Select date field to be used for Start/End range: ': 'Date DX',
re.compile(r'^ Start, Date DX: : \([^\)\r\n]+\): $'): util.vista_strftime(alpha),
re.compile(r'^ End, Date DX: : \([^\)\r\n]+\): TODAY// $'): util.vista_strftime(omega),
' Are these settings correct? YES// ': None,
}, throw=True):
if prompt.index == 4:
assert await expect.endswith('\r\n --------------------------------------------------------------\r\n\r\n\r\n')
proc.sendline() # skip delay
assert (m := await expect.endswith('\r\n\x1a'))
doc_rcrs = m.before
assert await expect.endswith('\r\nDEVICE: ', timeout_settle=31)
proc.sendline('HOME;80') # default HOME
assert await expect.earliest('HOME(CRT)\r\n')
summary = []
async for prompt, response in expect.prompts_any({
'\x07': None,
'Type <Enter> to continue or \'^\' to exit: ': None,
' Press \'RETURN\' to continue, \'^\' to stop: ': None,
'Select *..Utility Options Option: ': None,
'Select DHCP Tumor Registry Option: ': None,
}, throw=True):
if prompt.index == 0 or prompt.index == 1:
summary.extend({k.strip(): v.strip() for k, v in row.groupdict().items()} for row in re.finditer(r'(?P<last5>[A-Z]\d{4}) (?P<name>[^\r\n]{30}) (?P<uid>[^ \r\n]+) (?P<primarySite>[^ \r\n]+) (?P<dateOfDiagnosis>\d{2}/\d{2}/\d{4}) (?P<dateCaseLastChanged>\d{2}/\d{2}/\d{4})', prompt.before))
elif prompt.index == 4:
2025-01-14 21:56:51 -05:00
async for prompt, response in expect.promptmatches((
(re.compile(r' Press \'RETURN\' to continue, \'\^\' to stop: $'), None),
('Select Patient Information and OE/RR Option: ', None, True),
('Select Patient Information and OE/RR <TEST ACCOUNT> Option: ', None, True),
), throw=True):
if prompt.index == 0:
2024-03-02 00:34:29 -05:00
for item in parse_xml_rcrs(doc_rcrs, summary):
yield item
async def cmd_tumors(proc, alpha, omega):
async for patient in cmd_patients(proc, alpha, omega):
tumors = patient.pop('tumors')
for item in tumors:
item['patient'] = patient
yield item