mcmas.parser
1""" 2 3""" 4 5import re 6from typing import Optional 7 8from mcmas import util 9 10LOGGER = util.get_logger(__name__) 11 12 13def extract_witnesses(text: str, fname: str = ""): # -> typing.Dict: 14 """ 15 Parse agent state text into a list of dictionaries. Captures 16 ALL key=value pairs, not just 'state'. 17 18 Args: 19 text (str): Input text with Agent sections and indented key=value pairs 20 21 Returns: 22 list: List of dictionaries with all namespaced properties 23 """ 24 states = [x.strip() for x in text.strip().split("-- State ") if x.strip()] 25 LOGGER.info(f"decoded {len(states)} states from {fname}") 26 output = [] 27 for state in states: 28 result = [] 29 current_namespace = None 30 lines = [x for x in state.strip().split("\n") if x.strip()][1:] 31 lines = state.strip().split("\n") 32 for line in lines: 33 if not line.strip() or line.lstrip().startswith("--"): 34 continue 35 # Agent section header 36 agent_match = re.match(r"^ Agent\s+(\w+)$", line) 37 if agent_match: 38 current_namespace = agent_match.group(1) 39 continue 40 # Indented key=value pairs 41 kv_match = re.match(r"^\s+(\w+)=(\w+)$", line) 42 if kv_match and current_namespace: 43 key = kv_match.group(1) 44 value = kv_match.group(2) 45 result.append({f"{current_namespace}.{key}": value}) 46 # raise Exception(result) 47 tmp = {} 48 for item in result: 49 # result = dict([k,v] for k,v in result) 50 tmp.update(item) 51 if tmp: 52 output += [tmp] 53 return output 54 55 56def extract_block( 57 text: str, 58 pattern=None, 59 block_start: str = "^Agent", 60 block_end: str = r"end\s+Agent\b", 61 name=None, 62 value_only: bool = False, 63) -> dict: 64 """ 65 Extract ... 66 """ 67 if not pattern: 68 pattern = block_start + r"\s+(\w+)\s*\n(.*?)^\s*" + block_end 69 pattern = re.compile(pattern, re.MULTILINE | re.DOTALL) 70 out = {} 71 block = "" 72 for match in pattern.finditer(text): 73 name = match.group(1) if name is None else name 74 block = match.group(2) if name is None else match.group(1) 75 # Split into lines, strip trailing newline 76 # lines = [line.rstrip() for line in block.strip('\n').splitlines()] 77 out[name] = block # lines 78 if value_only: 79 return block 80 else: 81 return out 82 83 84def normalize(x): 85 """ 86 Extract ... 87 """ 88 return re.sub(r" +", " ", x) 89 90 91# def extract_formula(txt: str, section="Formulae") -> list: 92# return extract_toplevel(txt, section=section) 93 94 95def extract_toplevel(txt: str, section="Formulae") -> list: 96 """ 97 Extract ... 98 """ 99 pattern = re.compile( 100 r"^" + section + r"\s*\n(.*?)^\s*end\s+" + section + r"\b", 101 re.MULTILINE | re.DOTALL, 102 ) 103 txt = [x for x in txt.split("\n") if not x.lstrip().startswith("--")] 104 txt = "\n".join(txt) 105 for match in pattern.finditer(txt): 106 if match: 107 return [ 108 normalize(x.strip()).replace("\n", "") 109 for x in match.group(1).split(";") 110 if x.strip() 111 ] 112 113 114def extract_agents(txt: str) -> dict: 115 """ 116 Extract ... 117 """ 118 pattern = re.compile( 119 r"^Agent\s+(\w+)\s*\n(.*?)^\s*end\s+Agent\b", re.MULTILINE | re.DOTALL 120 ) 121 agents = {} 122 for match in pattern.finditer(txt): 123 name = match.group(1) 124 block = match.group(2) 125 # Split into lines, strip trailing newline 126 [line.rstrip() for line in block.strip("\n").splitlines()] 127 agents[name] = block # lines 128 129 for agent in agents: 130 path = [agent] 131 block = agents[agent] 132 agents[agent] = {} 133 LOGGER.debug(f"parsing agent={agent}") 134 for sub in ["Lobsvars", "Actions"]: 135 section = sub.lower() 136 path += [sub] 137 pattern = rf"\s*{sub}\s*" + r"=\s*\{([\s\S]*?)\};" 138 pattern = re.compile(pattern, re.MULTILINE | re.DOTALL) 139 match = re.search(pattern, block) 140 if match: 141 sub_block = match.group(1).strip() 142 else: 143 sub_block = "" 144 sub_block = normalize(sub_block) 145 if agent.lower() == "child1" and section == "actions": 146 pass 147 agents[agent][section] = sub_block 148 149 for sub in ["Protocol", "Vars", "Obsvars", "Evolution"]: 150 section = sub.lower() 151 if all( 152 [ 153 sub in ["Obsvars"], 154 agent not in ["Environment", "environment"], 155 ] 156 ): 157 continue 158 path += [sub] 159 sub_block = extract_block( 160 block, 161 name=sub, 162 pattern=rf"{sub}:\s*\n(.*?)^\s*end\s+{sub}\b", 163 value_only=True, 164 ) 165 sub_block = sub_block.replace("\t", "") 166 sub_block = "\n".join( 167 [ 168 line 169 for line in sub_block.split("\n") 170 if not line.lstrip().startswith("--") 171 ] 172 ) 173 # sub_block = sub_block.replace("\n", "") 174 sub_block = normalize(sub_block) 175 sub_block = sub_block.split(";") 176 agents[agent][section] = [x.strip() for x in sub_block if x.strip()] 177 if not agents[agent][section]: 178 if all( 179 [ 180 agent in ["environment", "Environment"], 181 section in ["obsvars", "evolution"], 182 ] 183 ): 184 LOGGER.debug(f"skipping section `{agent}.{section}` (missing ok)") 185 else: 186 LOGGER.warning( 187 f"could not extract {agent}.{section} from block:\n{block}\n\n{sub_block}" 188 ) 189 path.pop(-1) 190 return agents 191 192 193# @validate_call 194def parser(txt, strict=False, file: Optional[str] = None): 195 """NB: fname is purely informational, only txt is used.""" 196 # from mcmas import models 197 from mcmas import ispl as ns 198 199 # ns = models if not strict else models.strict 200 tmp = txt.lstrip().split("\n") 201 tmp = tmp and tmp[0] 202 tmp = tmp.startswith("--") and tmp.replace("--", "").lstrip().strip() 203 title = tmp if tmp else "untitled spec" 204 # txt.strip().startswith('--') 205 agents = extract_agents(txt) 206 # raise Exception(agents) 207 # assert "Environment" in agents 208 environment = agents.pop("Environment", {}) 209 ns.Environment(**environment) 210 agents = {k: ns.Agent(**v) for k, v in agents.items()} 211 return ns.ISPL( 212 metadata={"file": file, "parser": f"{__name__}"}, 213 title=title, 214 environment=environment, 215 agents=agents, 216 formulae=extract_toplevel(txt, section="Formulae"), 217 groups=extract_toplevel(txt, section="Groups"), 218 evaluation=extract_toplevel(txt, section="Evaluation"), 219 init_states=extract_toplevel(txt, section="InitStates"), 220 ) 221 222 223parse = parser
LOGGER =
<Logger mcmas.parser (INFO)>
def
extract_witnesses(text: str, fname: str = ''):
14def extract_witnesses(text: str, fname: str = ""): # -> typing.Dict: 15 """ 16 Parse agent state text into a list of dictionaries. Captures 17 ALL key=value pairs, not just 'state'. 18 19 Args: 20 text (str): Input text with Agent sections and indented key=value pairs 21 22 Returns: 23 list: List of dictionaries with all namespaced properties 24 """ 25 states = [x.strip() for x in text.strip().split("-- State ") if x.strip()] 26 LOGGER.info(f"decoded {len(states)} states from {fname}") 27 output = [] 28 for state in states: 29 result = [] 30 current_namespace = None 31 lines = [x for x in state.strip().split("\n") if x.strip()][1:] 32 lines = state.strip().split("\n") 33 for line in lines: 34 if not line.strip() or line.lstrip().startswith("--"): 35 continue 36 # Agent section header 37 agent_match = re.match(r"^ Agent\s+(\w+)$", line) 38 if agent_match: 39 current_namespace = agent_match.group(1) 40 continue 41 # Indented key=value pairs 42 kv_match = re.match(r"^\s+(\w+)=(\w+)$", line) 43 if kv_match and current_namespace: 44 key = kv_match.group(1) 45 value = kv_match.group(2) 46 result.append({f"{current_namespace}.{key}": value}) 47 # raise Exception(result) 48 tmp = {} 49 for item in result: 50 # result = dict([k,v] for k,v in result) 51 tmp.update(item) 52 if tmp: 53 output += [tmp] 54 return output
Parse agent state text into a list of dictionaries. Captures ALL key=value pairs, not just 'state'.
Args: text (str): Input text with Agent sections and indented key=value pairs
Returns: list: List of dictionaries with all namespaced properties
def
extract_block( text: str, pattern=None, block_start: str = '^Agent', block_end: str = 'end\\s+Agent\\b', name=None, value_only: bool = False) -> dict:
57def extract_block( 58 text: str, 59 pattern=None, 60 block_start: str = "^Agent", 61 block_end: str = r"end\s+Agent\b", 62 name=None, 63 value_only: bool = False, 64) -> dict: 65 """ 66 Extract ... 67 """ 68 if not pattern: 69 pattern = block_start + r"\s+(\w+)\s*\n(.*?)^\s*" + block_end 70 pattern = re.compile(pattern, re.MULTILINE | re.DOTALL) 71 out = {} 72 block = "" 73 for match in pattern.finditer(text): 74 name = match.group(1) if name is None else name 75 block = match.group(2) if name is None else match.group(1) 76 # Split into lines, strip trailing newline 77 # lines = [line.rstrip() for line in block.strip('\n').splitlines()] 78 out[name] = block # lines 79 if value_only: 80 return block 81 else: 82 return out
Extract ...
def
normalize(x):
Extract ...
def
extract_toplevel(txt: str, section='Formulae') -> list:
96def extract_toplevel(txt: str, section="Formulae") -> list: 97 """ 98 Extract ... 99 """ 100 pattern = re.compile( 101 r"^" + section + r"\s*\n(.*?)^\s*end\s+" + section + r"\b", 102 re.MULTILINE | re.DOTALL, 103 ) 104 txt = [x for x in txt.split("\n") if not x.lstrip().startswith("--")] 105 txt = "\n".join(txt) 106 for match in pattern.finditer(txt): 107 if match: 108 return [ 109 normalize(x.strip()).replace("\n", "") 110 for x in match.group(1).split(";") 111 if x.strip() 112 ]
Extract ...
def
extract_agents(txt: str) -> dict:
115def extract_agents(txt: str) -> dict: 116 """ 117 Extract ... 118 """ 119 pattern = re.compile( 120 r"^Agent\s+(\w+)\s*\n(.*?)^\s*end\s+Agent\b", re.MULTILINE | re.DOTALL 121 ) 122 agents = {} 123 for match in pattern.finditer(txt): 124 name = match.group(1) 125 block = match.group(2) 126 # Split into lines, strip trailing newline 127 [line.rstrip() for line in block.strip("\n").splitlines()] 128 agents[name] = block # lines 129 130 for agent in agents: 131 path = [agent] 132 block = agents[agent] 133 agents[agent] = {} 134 LOGGER.debug(f"parsing agent={agent}") 135 for sub in ["Lobsvars", "Actions"]: 136 section = sub.lower() 137 path += [sub] 138 pattern = rf"\s*{sub}\s*" + r"=\s*\{([\s\S]*?)\};" 139 pattern = re.compile(pattern, re.MULTILINE | re.DOTALL) 140 match = re.search(pattern, block) 141 if match: 142 sub_block = match.group(1).strip() 143 else: 144 sub_block = "" 145 sub_block = normalize(sub_block) 146 if agent.lower() == "child1" and section == "actions": 147 pass 148 agents[agent][section] = sub_block 149 150 for sub in ["Protocol", "Vars", "Obsvars", "Evolution"]: 151 section = sub.lower() 152 if all( 153 [ 154 sub in ["Obsvars"], 155 agent not in ["Environment", "environment"], 156 ] 157 ): 158 continue 159 path += [sub] 160 sub_block = extract_block( 161 block, 162 name=sub, 163 pattern=rf"{sub}:\s*\n(.*?)^\s*end\s+{sub}\b", 164 value_only=True, 165 ) 166 sub_block = sub_block.replace("\t", "") 167 sub_block = "\n".join( 168 [ 169 line 170 for line in sub_block.split("\n") 171 if not line.lstrip().startswith("--") 172 ] 173 ) 174 # sub_block = sub_block.replace("\n", "") 175 sub_block = normalize(sub_block) 176 sub_block = sub_block.split(";") 177 agents[agent][section] = [x.strip() for x in sub_block if x.strip()] 178 if not agents[agent][section]: 179 if all( 180 [ 181 agent in ["environment", "Environment"], 182 section in ["obsvars", "evolution"], 183 ] 184 ): 185 LOGGER.debug(f"skipping section `{agent}.{section}` (missing ok)") 186 else: 187 LOGGER.warning( 188 f"could not extract {agent}.{section} from block:\n{block}\n\n{sub_block}" 189 ) 190 path.pop(-1) 191 return agents
Extract ...
def
parser(txt, strict=False, file: Optional[str] = None):
195def parser(txt, strict=False, file: Optional[str] = None): 196 """NB: fname is purely informational, only txt is used.""" 197 # from mcmas import models 198 from mcmas import ispl as ns 199 200 # ns = models if not strict else models.strict 201 tmp = txt.lstrip().split("\n") 202 tmp = tmp and tmp[0] 203 tmp = tmp.startswith("--") and tmp.replace("--", "").lstrip().strip() 204 title = tmp if tmp else "untitled spec" 205 # txt.strip().startswith('--') 206 agents = extract_agents(txt) 207 # raise Exception(agents) 208 # assert "Environment" in agents 209 environment = agents.pop("Environment", {}) 210 ns.Environment(**environment) 211 agents = {k: ns.Agent(**v) for k, v in agents.items()} 212 return ns.ISPL( 213 metadata={"file": file, "parser": f"{__name__}"}, 214 title=title, 215 environment=environment, 216 agents=agents, 217 formulae=extract_toplevel(txt, section="Formulae"), 218 groups=extract_toplevel(txt, section="Groups"), 219 evaluation=extract_toplevel(txt, section="Evaluation"), 220 init_states=extract_toplevel(txt, section="InitStates"), 221 )
NB: fname is purely informational, only txt is used.
def
parse(txt, strict=False, file: Optional[str] = None):
195def parser(txt, strict=False, file: Optional[str] = None): 196 """NB: fname is purely informational, only txt is used.""" 197 # from mcmas import models 198 from mcmas import ispl as ns 199 200 # ns = models if not strict else models.strict 201 tmp = txt.lstrip().split("\n") 202 tmp = tmp and tmp[0] 203 tmp = tmp.startswith("--") and tmp.replace("--", "").lstrip().strip() 204 title = tmp if tmp else "untitled spec" 205 # txt.strip().startswith('--') 206 agents = extract_agents(txt) 207 # raise Exception(agents) 208 # assert "Environment" in agents 209 environment = agents.pop("Environment", {}) 210 ns.Environment(**environment) 211 agents = {k: ns.Agent(**v) for k, v in agents.items()} 212 return ns.ISPL( 213 metadata={"file": file, "parser": f"{__name__}"}, 214 title=title, 215 environment=environment, 216 agents=agents, 217 formulae=extract_toplevel(txt, section="Formulae"), 218 groups=extract_toplevel(txt, section="Groups"), 219 evaluation=extract_toplevel(txt, section="Evaluation"), 220 init_states=extract_toplevel(txt, section="InitStates"), 221 )
NB: fname is purely informational, only txt is used.