mcmas.parser

View Source

  1"""
  2
  3"""
  4
  5import re
  6from typing import Optional
  7
  8from mcmas import util
  9
 10LOGGER = util.get_logger(__name__)
 11
 12
 13def extract_witnesses(text: str, fname: str = ""):  # -> typing.Dict:
 14    """
 15    Parse agent state text into a list of dictionaries. Captures
 16    ALL key=value pairs, not just 'state'.
 17
 18    Args:
 19        text (str): Input text with Agent sections and indented key=value pairs
 20
 21    Returns:
 22        list: List of dictionaries with all namespaced properties
 23    """
 24    states = [x.strip() for x in text.strip().split("-- State ") if x.strip()]
 25    LOGGER.info(f"decoded {len(states)} states from {fname}")
 26    output = []
 27    for state in states:
 28        result = []
 29        current_namespace = None
 30        lines = [x for x in state.strip().split("\n") if x.strip()][1:]
 31        lines = state.strip().split("\n")
 32        for line in lines:
 33            if not line.strip() or line.lstrip().startswith("--"):
 34                continue
 35            # Agent section header
 36            agent_match = re.match(r"^  Agent\s+(\w+)$", line)
 37            if agent_match:
 38                current_namespace = agent_match.group(1)
 39                continue
 40            # Indented key=value pairs
 41            kv_match = re.match(r"^\s+(\w+)=(\w+)$", line)
 42            if kv_match and current_namespace:
 43                key = kv_match.group(1)
 44                value = kv_match.group(2)
 45                result.append({f"{current_namespace}.{key}": value})
 46        # raise Exception(result)
 47        tmp = {}
 48        for item in result:
 49            # result = dict([k,v] for k,v in result)
 50            tmp.update(item)
 51        if tmp:
 52            output += [tmp]
 53    return output
 54
 55
 56def extract_block(
 57    text: str,
 58    pattern=None,
 59    block_start: str = "^Agent",
 60    block_end: str = r"end\s+Agent\b",
 61    name=None,
 62    value_only: bool = False,
 63) -> dict:
 64    """
 65    Extract ...
 66    """
 67    if not pattern:
 68        pattern = block_start + r"\s+(\w+)\s*\n(.*?)^\s*" + block_end
 69    pattern = re.compile(pattern, re.MULTILINE | re.DOTALL)
 70    out = {}
 71    block = ""
 72    for match in pattern.finditer(text):
 73        name = match.group(1) if name is None else name
 74        block = match.group(2) if name is None else match.group(1)
 75        # Split into lines, strip trailing newline
 76        # lines = [line.rstrip() for line in block.strip('\n').splitlines()]
 77        out[name] = block  # lines
 78    if value_only:
 79        return block
 80    else:
 81        return out
 82
 83
 84def normalize(x):
 85    """
 86    Extract ...
 87    """
 88    return re.sub(r" +", " ", x)
 89
 90
 91# def extract_formula(txt: str, section="Formulae") -> list:
 92#     return extract_toplevel(txt, section=section)
 93
 94
 95def extract_toplevel(txt: str, section="Formulae") -> list:
 96    """
 97    Extract ...
 98    """
 99    pattern = re.compile(
100        r"^" + section + r"\s*\n(.*?)^\s*end\s+" + section + r"\b",
101        re.MULTILINE | re.DOTALL,
102    )
103    txt = [x for x in txt.split("\n") if not x.lstrip().startswith("--")]
104    txt = "\n".join(txt)
105    for match in pattern.finditer(txt):
106        if match:
107            return [
108                normalize(x.strip()).replace("\n", "")
109                for x in match.group(1).split(";")
110                if x.strip()
111            ]
112
113
114def extract_agents(txt: str) -> dict:
115    """
116    Extract ...
117    """
118    pattern = re.compile(
119        r"^Agent\s+(\w+)\s*\n(.*?)^\s*end\s+Agent\b", re.MULTILINE | re.DOTALL
120    )
121    agents = {}
122    for match in pattern.finditer(txt):
123        name = match.group(1)
124        block = match.group(2)
125        # Split into lines, strip trailing newline
126        [line.rstrip() for line in block.strip("\n").splitlines()]
127        agents[name] = block  # lines
128
129    for agent in agents:
130        path = [agent]
131        block = agents[agent]
132        agents[agent] = {}
133        LOGGER.debug(f"parsing agent={agent}")
134        for sub in ["Lobsvars", "Actions"]:
135            section = sub.lower()
136            path += [sub]
137            pattern = rf"\s*{sub}\s*" + r"=\s*\{([\s\S]*?)\};"
138            pattern = re.compile(pattern, re.MULTILINE | re.DOTALL)
139            match = re.search(pattern, block)
140            if match:
141                sub_block = match.group(1).strip()
142            else:
143                sub_block = ""
144            sub_block = normalize(sub_block)
145            if agent.lower() == "child1" and section == "actions":
146                pass
147            agents[agent][section] = sub_block
148
149        for sub in ["Protocol", "Vars", "Obsvars", "Evolution"]:
150            section = sub.lower()
151            if all(
152                [
153                    sub in ["Obsvars"],
154                    agent not in ["Environment", "environment"],
155                ]
156            ):
157                continue
158            path += [sub]
159            sub_block = extract_block(
160                block,
161                name=sub,
162                pattern=rf"{sub}:\s*\n(.*?)^\s*end\s+{sub}\b",
163                value_only=True,
164            )
165            sub_block = sub_block.replace("\t", "")
166            sub_block = "\n".join(
167                [
168                    line
169                    for line in sub_block.split("\n")
170                    if not line.lstrip().startswith("--")
171                ]
172            )
173            # sub_block = sub_block.replace("\n", "")
174            sub_block = normalize(sub_block)
175            sub_block = sub_block.split(";")
176            agents[agent][section] = [x.strip() for x in sub_block if x.strip()]
177            if not agents[agent][section]:
178                if all(
179                    [
180                        agent in ["environment", "Environment"],
181                        section in ["obsvars", "evolution"],
182                    ]
183                ):
184                    LOGGER.debug(f"skipping section `{agent}.{section}` (missing ok)")
185                else:
186                    LOGGER.warning(
187                        f"could not extract {agent}.{section} from block:\n{block}\n\n{sub_block}"
188                    )
189            path.pop(-1)
190    return agents
191
192
193# @validate_call
194def parser(txt, strict=False, file: Optional[str] = None):
195    """NB: fname is purely informational, only txt is used."""
196    # from mcmas import models
197    from mcmas import ispl as ns
198
199    # ns = models if not strict else models.strict
200    tmp = txt.lstrip().split("\n")
201    tmp = tmp and tmp[0]
202    tmp = tmp.startswith("--") and tmp.replace("--", "").lstrip().strip()
203    title = tmp if tmp else "untitled spec"
204    # txt.strip().startswith('--')
205    agents = extract_agents(txt)
206    # raise Exception(agents)
207    # assert "Environment" in agents
208    environment = agents.pop("Environment", {})
209    ns.Environment(**environment)
210    agents = {k: ns.Agent(**v) for k, v in agents.items()}
211    return ns.ISPL(
212        metadata={"file": file, "parser": f"{__name__}"},
213        title=title,
214        environment=environment,
215        agents=agents,
216        formulae=extract_toplevel(txt, section="Formulae"),
217        groups=extract_toplevel(txt, section="Groups"),
218        evaluation=extract_toplevel(txt, section="Evaluation"),
219        init_states=extract_toplevel(txt, section="InitStates"),
220    )
221
222
223parse = parser

LOGGER = <Logger mcmas.parser (INFO)>

def extract_witnesses(text: str, fname: str = ''): View Source

14def extract_witnesses(text: str, fname: str = ""):  # -> typing.Dict:
15    """
16    Parse agent state text into a list of dictionaries. Captures
17    ALL key=value pairs, not just 'state'.
18
19    Args:
20        text (str): Input text with Agent sections and indented key=value pairs
21
22    Returns:
23        list: List of dictionaries with all namespaced properties
24    """
25    states = [x.strip() for x in text.strip().split("-- State ") if x.strip()]
26    LOGGER.info(f"decoded {len(states)} states from {fname}")
27    output = []
28    for state in states:
29        result = []
30        current_namespace = None
31        lines = [x for x in state.strip().split("\n") if x.strip()][1:]
32        lines = state.strip().split("\n")
33        for line in lines:
34            if not line.strip() or line.lstrip().startswith("--"):
35                continue
36            # Agent section header
37            agent_match = re.match(r"^  Agent\s+(\w+)$", line)
38            if agent_match:
39                current_namespace = agent_match.group(1)
40                continue
41            # Indented key=value pairs
42            kv_match = re.match(r"^\s+(\w+)=(\w+)$", line)
43            if kv_match and current_namespace:
44                key = kv_match.group(1)
45                value = kv_match.group(2)
46                result.append({f"{current_namespace}.{key}": value})
47        # raise Exception(result)
48        tmp = {}
49        for item in result:
50            # result = dict([k,v] for k,v in result)
51            tmp.update(item)
52        if tmp:
53            output += [tmp]
54    return output

Parse agent state text into a list of dictionaries. Captures ALL key=value pairs, not just 'state'.

Args: text (str): Input text with Agent sections and indented key=value pairs

Returns: list: List of dictionaries with all namespaced properties

def extract_block( text: str, pattern=None, block_start: str = '^Agent', block_end: str = 'end\\s+Agent\\b', name=None, value_only: bool = False) -> dict: View Source

57def extract_block(
58    text: str,
59    pattern=None,
60    block_start: str = "^Agent",
61    block_end: str = r"end\s+Agent\b",
62    name=None,
63    value_only: bool = False,
64) -> dict:
65    """
66    Extract ...
67    """
68    if not pattern:
69        pattern = block_start + r"\s+(\w+)\s*\n(.*?)^\s*" + block_end
70    pattern = re.compile(pattern, re.MULTILINE | re.DOTALL)
71    out = {}
72    block = ""
73    for match in pattern.finditer(text):
74        name = match.group(1) if name is None else name
75        block = match.group(2) if name is None else match.group(1)
76        # Split into lines, strip trailing newline
77        # lines = [line.rstrip() for line in block.strip('\n').splitlines()]
78        out[name] = block  # lines
79    if value_only:
80        return block
81    else:
82        return out

Extract ...

def normalize(x): View Source

85def normalize(x):
86    """
87    Extract ...
88    """
89    return re.sub(r" +", " ", x)

Extract ...

def extract_toplevel(txt: str, section='Formulae') -> list: View Source

 96def extract_toplevel(txt: str, section="Formulae") -> list:
 97    """
 98    Extract ...
 99    """
100    pattern = re.compile(
101        r"^" + section + r"\s*\n(.*?)^\s*end\s+" + section + r"\b",
102        re.MULTILINE | re.DOTALL,
103    )
104    txt = [x for x in txt.split("\n") if not x.lstrip().startswith("--")]
105    txt = "\n".join(txt)
106    for match in pattern.finditer(txt):
107        if match:
108            return [
109                normalize(x.strip()).replace("\n", "")
110                for x in match.group(1).split(";")
111                if x.strip()
112            ]

Extract ...

def extract_agents(txt: str) -> dict: View Source

115def extract_agents(txt: str) -> dict:
116    """
117    Extract ...
118    """
119    pattern = re.compile(
120        r"^Agent\s+(\w+)\s*\n(.*?)^\s*end\s+Agent\b", re.MULTILINE | re.DOTALL
121    )
122    agents = {}
123    for match in pattern.finditer(txt):
124        name = match.group(1)
125        block = match.group(2)
126        # Split into lines, strip trailing newline
127        [line.rstrip() for line in block.strip("\n").splitlines()]
128        agents[name] = block  # lines
129
130    for agent in agents:
131        path = [agent]
132        block = agents[agent]
133        agents[agent] = {}
134        LOGGER.debug(f"parsing agent={agent}")
135        for sub in ["Lobsvars", "Actions"]:
136            section = sub.lower()
137            path += [sub]
138            pattern = rf"\s*{sub}\s*" + r"=\s*\{([\s\S]*?)\};"
139            pattern = re.compile(pattern, re.MULTILINE | re.DOTALL)
140            match = re.search(pattern, block)
141            if match:
142                sub_block = match.group(1).strip()
143            else:
144                sub_block = ""
145            sub_block = normalize(sub_block)
146            if agent.lower() == "child1" and section == "actions":
147                pass
148            agents[agent][section] = sub_block
149
150        for sub in ["Protocol", "Vars", "Obsvars", "Evolution"]:
151            section = sub.lower()
152            if all(
153                [
154                    sub in ["Obsvars"],
155                    agent not in ["Environment", "environment"],
156                ]
157            ):
158                continue
159            path += [sub]
160            sub_block = extract_block(
161                block,
162                name=sub,
163                pattern=rf"{sub}:\s*\n(.*?)^\s*end\s+{sub}\b",
164                value_only=True,
165            )
166            sub_block = sub_block.replace("\t", "")
167            sub_block = "\n".join(
168                [
169                    line
170                    for line in sub_block.split("\n")
171                    if not line.lstrip().startswith("--")
172                ]
173            )
174            # sub_block = sub_block.replace("\n", "")
175            sub_block = normalize(sub_block)
176            sub_block = sub_block.split(";")
177            agents[agent][section] = [x.strip() for x in sub_block if x.strip()]
178            if not agents[agent][section]:
179                if all(
180                    [
181                        agent in ["environment", "Environment"],
182                        section in ["obsvars", "evolution"],
183                    ]
184                ):
185                    LOGGER.debug(f"skipping section `{agent}.{section}` (missing ok)")
186                else:
187                    LOGGER.warning(
188                        f"could not extract {agent}.{section} from block:\n{block}\n\n{sub_block}"
189                    )
190            path.pop(-1)
191    return agents

Extract ...

def parser(txt, strict=False, file: Optional[str] = None): View Source

195def parser(txt, strict=False, file: Optional[str] = None):
196    """NB: fname is purely informational, only txt is used."""
197    # from mcmas import models
198    from mcmas import ispl as ns
199
200    # ns = models if not strict else models.strict
201    tmp = txt.lstrip().split("\n")
202    tmp = tmp and tmp[0]
203    tmp = tmp.startswith("--") and tmp.replace("--", "").lstrip().strip()
204    title = tmp if tmp else "untitled spec"
205    # txt.strip().startswith('--')
206    agents = extract_agents(txt)
207    # raise Exception(agents)
208    # assert "Environment" in agents
209    environment = agents.pop("Environment", {})
210    ns.Environment(**environment)
211    agents = {k: ns.Agent(**v) for k, v in agents.items()}
212    return ns.ISPL(
213        metadata={"file": file, "parser": f"{__name__}"},
214        title=title,
215        environment=environment,
216        agents=agents,
217        formulae=extract_toplevel(txt, section="Formulae"),
218        groups=extract_toplevel(txt, section="Groups"),
219        evaluation=extract_toplevel(txt, section="Evaluation"),
220        init_states=extract_toplevel(txt, section="InitStates"),
221    )

NB: fname is purely informational, only txt is used.

def parse(txt, strict=False, file: Optional[str] = None): View Source

195def parser(txt, strict=False, file: Optional[str] = None):
196    """NB: fname is purely informational, only txt is used."""
197    # from mcmas import models
198    from mcmas import ispl as ns
199
200    # ns = models if not strict else models.strict
201    tmp = txt.lstrip().split("\n")
202    tmp = tmp and tmp[0]
203    tmp = tmp.startswith("--") and tmp.replace("--", "").lstrip().strip()
204    title = tmp if tmp else "untitled spec"
205    # txt.strip().startswith('--')
206    agents = extract_agents(txt)
207    # raise Exception(agents)
208    # assert "Environment" in agents
209    environment = agents.pop("Environment", {})
210    ns.Environment(**environment)
211    agents = {k: ns.Agent(**v) for k, v in agents.items()}
212    return ns.ISPL(
213        metadata={"file": file, "parser": f"{__name__}"},
214        title=title,
215        environment=environment,
216        agents=agents,
217        formulae=extract_toplevel(txt, section="Formulae"),
218        groups=extract_toplevel(txt, section="Groups"),
219        evaluation=extract_toplevel(txt, section="Evaluation"),
220        init_states=extract_toplevel(txt, section="InitStates"),
221    )

NB: fname is purely informational, only txt is used.