Scientific-Computing-Lab
diff --git a/‎llm-querying/__init__.py‎ b/‎llm-querying/__init__.py‎
diff --git a/‎llm-querying/agent.py‎
Lines changed: 150 additions & 0 deletions b/‎llm-querying/agent.py‎
Lines changed: 150 additions & 0 deletions
diff --git a/‎llm-querying/configuration.py‎
Lines changed: 64 additions & 0 deletions b/‎llm-querying/configuration.py‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎llm-querying/dataset_and_llm.py‎
Lines changed: 96 additions & 0 deletions b/‎llm-querying/dataset_and_llm.py‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎llm-querying/doAllBaselineRuns.sh‎
Lines changed: 27 additions & 0 deletions b/‎llm-querying/doAllBaselineRuns.sh‎
Lines changed: 27 additions & 0 deletions
@@ -0,0 +1,150 @@
+# make a langchain graph instance with the model
+from typing_extensions import TypedDict, List, Annotated, Literal
+from dataset_and_llm import llm
+from prompts import make_prompt, FLOPCounts
+from io_cost import get_query_cost
+import operator
+from langgraph.graph import StateGraph, END
+from langgraph.checkpoint.sqlite import SqliteSaver
+from langchain.schema import AIMessage
+from configuration import Configuration
+import sqlite3
+
+class BaselineQueryState(TypedDict, total=False):
+    source_code: str
+    combined_name: str
+    kernel_name: str
+    exec_args: str
+    grid_size: str
+    block_size: str
+    total_num_threads: str
+    compile_commands: str
+
+    empirical_sp_flop_count: float
+    empirical_dp_flop_count: float
+
+    prompt_type: Literal["simple", "full"]
+
+    raw_flop_counts: Annotated[List[AIMessage], operator.add]
+
+    predicted_sp_flop_count: int
+    predicted_dp_flop_count: int
+    predicted_sp_flop_count_explanation: str
+    predicted_dp_flop_count_explanation: str
+    
+    input_tokens: Annotated[List[int], operator.add]
+    output_tokens: Annotated[List[int], operator.add]
+    total_cost: Annotated[List[float], operator.add]
+
+    total_query_time: Annotated[List[float], operator.add]
+    error: Annotated[List[str], operator.add]
+
+# Calculate the total number of threads from the gridSz and the blockSz
+# grid size is a string of format "(x, y, z)"
+# block size is a string of format "(x, y, z)"
+def calc_total_threads(gridSz:str, blockSz:str):
+    gridSz = eval(gridSz)
+    blockSz = eval(blockSz)
+    total_threads = gridSz[0] * gridSz[1] * gridSz[2] * blockSz[0] * blockSz[1] * blockSz[2]
+    return str(total_threads)
+
+def get_input_problem(state: BaselineQueryState, config):
+    verbose = config.get("configurable", {}).get("verbose_printing", False)
+
+    row = config.get("configurable", {}).get("input_problem_row", None) 
+
+    prompt_type = config.get("configurable", {}).get("prompt_type", "simple")
+
+    combined_name = row['combined_name']
+
+    assert row is not None, f"Target problem '{combined_name}' not found in the dataset."
+
+    if verbose:
+        print("---------- BEGIN STEP 0: GET INPUT PROBLEM ----------", flush=True)
+
+    to_return = {'source_code' : row['source_code'], 
+            'combined_name' : combined_name,
+            'kernel_name' : row['Kernel Name'],
+            'exec_args' : row['exeArgs'],
+            'grid_size' : row['Grid Size'],
+            'block_size' : row['Block Size'],
+            'total_num_threads' : calc_total_threads(row['Grid Size'], row['Block Size']),
+            'compile_commands' : row['compile_commands'],
+            # these "true" values do not get passed to the LLMs
+            # they are used to calculate how close the LLM prediction is to the ground-truth
+            'empirical_sp_flop_count' : row['SP_FLOP'],
+            'empirical_dp_flop_count' : row['DP_FLOP'],
+            'prompt_type' : prompt_type
+            }
+
+    if verbose:
+        for k, v in to_return.items():
+            if k != "source_code":
+                print(f"\t{k}: {v}", flush=True)
+        print("---------- END STEP 0: GET INPUT PROBLEM ----------", flush=True)
+
+    return to_return
+
+
+def query_for_flop_count(state: BaselineQueryState, config):
+    verbose = config.get("configurable", {}).get("verbose_printing", False)
+
+    configured_llm = llm.with_config(configurable=config.get("configurable", {})).with_structured_output(FLOPCounts, include_raw=True)
+
+    prompt = make_prompt(state['prompt_type'])
+
+    chain = prompt | configured_llm 
+
+    if verbose:
+        print("---------- BEGIN STEP 1: QUERY FOR FLOP COUNT ----------", flush=True)
+        print(f"\tQuerying for FLOP count of kernel: {state['combined_name']}", flush=True)
+
+    result = chain.invoke({
+        "source_code": state['source_code'],
+        "kernel_name": state['kernel_name'],
+        "exec_args": state['exec_args'],
+        "grid_size": state['grid_size'],
+        "block_size": state['block_size'],
+        "total_num_threads": state['total_num_threads'],
+        "compile_commands": state['compile_commands']
+    })
+
+    parsed_result = result['parsed']
+
+    if verbose:
+        result['raw'].pretty_print()
+        # check if the sp_flop_count attributes are present and not None
+        if parsed_result.sp_flop_count is not None and parsed_result.dp_flop_count is not None:
+            print(f"\tGot an LLM response!: \n\tSP_FLOP:[{parsed_result.sp_flop_count}], \n\tDP_FLOP:[{parsed_result.dp_flop_count}]\n", flush=True)
+
+    query_cost = get_query_cost(result['raw'], verbose)
+
+    return query_cost | {'predicted_sp_flop_count': parsed_result.sp_flop_count, 
+                         'predicted_dp_flop_count': parsed_result.dp_flop_count, 
+                         'predicted_sp_flop_count_explanation': parsed_result.sp_flop_explanation, 
+                         'predicted_dp_flop_count_explanation': parsed_result.dp_flop_explanation, 
+                         'raw_flop_counts': [result['raw']]
+                        }
+
+
+def make_graph(sqlite_db_path: str):
+    # now let's set up the StateGraph to represent the agent
+    workflow = StateGraph(BaselineQueryState, context_schema=Configuration)
+    workflow.add_node("get_input_problem_0", get_input_problem)
+    workflow.add_node("query_for_flop_count_1", query_for_flop_count)
+
+    workflow.add_edge("get_input_problem_0", "query_for_flop_count_1")
+    workflow.add_edge("query_for_flop_count_1", END)
+
+    workflow.set_entry_point("get_input_problem_0")
+
+    # let's also add a checkpointer to save intermediate results
+    # sqlite_db_path: path to sqlite database used by SqliteSaver to persist graph checkpoints
+    conn = sqlite3.connect(sqlite_db_path, check_same_thread=False)
+    checkpointer = SqliteSaver(conn)
+    graph = workflow.compile(checkpointer=checkpointer)
+
+    return graph
+
+
+
@@ -0,0 +1,64 @@
+from pydantic import BaseModel, Field
+from typing import Annotated, Literal
+
+llm_nodes = [
+    "query_for_flop_count_1"
+]
+
+all_nodes = llm_nodes + ["get_input_problem_0"]
+
+class Configuration(BaseModel):
+    temp : float = Field(default=0.2, 
+                         description="The temperature to use for the LLM. Higher values make the output more random, lower values make it more deterministic.",
+                         json_schema_extra={"langgraph_nodes": llm_nodes}
+                         )
+
+    top_p : float = Field(default=0.1, 
+                          description="The top_p value to use for the LLM. Higher values make the output more random, lower values make it more deterministic. This is used in conjunction with temperature to control the randomness of the output.",
+                          json_schema_extra={"langgraph_nodes": llm_nodes}
+                          )
+
+    provider_url : str = Field(default="https://openrouter.com/api/v1",
+                              description="The URL of the provider's API endpoint. This is used to connect to the LLM provider.",
+                              json_schema_extra={"langgraph_nodes": llm_nodes}
+                              )
+
+    provider_api_key: str = Field(default="",
+                                 description="The API key for the LLM provider. This is used to authenticate requests to the provider's API.",
+                                 json_schema_extra={"langgraph_nodes": llm_nodes}
+                                 )
+
+    api_version: str = Field(default="",
+                            description="(Azure only) The API version to use when connecting to the Azure OpenAI service.",
+                            json_schema_extra={"langgraph_nodes": llm_nodes}
+                            )
+    model: Annotated[
+        Literal[
+            "openai/gpt-4.1-nano", # in $0.1 out $0.4
+            "openai/gpt-4.1-mini", # in $0.4 out $1.6
+            "openai/gpt-4o-mini", # in $0.15 out $0.6
+            "openai/o4-mini-high", # in $1.1 out $4.4
+            "openai/o4-mini", # in $1.1 out $4.4
+            "openai/o3-mini-high", # in $1.1 out $4.4
+            "openai/o3-mini", # in $1.1 out $4.4
+            "google/gemini-flash-1.5", # in $0.075 out $0.3
+            "google/gemini-2.0-flash-lite-001", # in $0.075 out $0.3
+            "google/gemini-2.0-flash-001", # in $0.1 out $0.4
+            "google/gemini-2.5-flash", # in $0.3 out $2.5
+            "anthropic/claude-3.5-haiku" # in $0.8 out $4.0
+            "gpt-5-mini", # in $
+        ],
+        {"__template_metadata__": {"kind": "llm"}},
+    ] = Field(
+        default="openai/gpt-4.1-mini",
+        #default="openai/o3-mini",
+        description="The name of the language model to use for the agent's main interactions. "
+        "Should be in the form: provider/model-name.",
+        json_schema_extra={"langgraph_nodes": llm_nodes},
+    )
+
+    verbose_printing: bool = Field(
+        default=False,
+        description="If True, the agent will print detailed information about each step of the analysis.",
+        json_schema_extra={"langgraph_nodes": all_nodes},
+    )
@@ -0,0 +1,96 @@
+import pandas as pd
+
+from langchain_openai import ChatOpenAI, AzureChatOpenAI
+from langchain_core.runnables import ConfigurableField
+
+import os
+import csv
+
+
+GPU_FLOPBENCH_ROOT = os.environ.get('GPU_FLOPBENCH_ROOT')
+print(GPU_FLOPBENCH_ROOT)
+
+hard_dataset_path = os.path.join(GPU_FLOPBENCH_ROOT, 'dataset-creation', 'hard_kernels_to_inference_unbalanced_with_compile_commands.csv')
+print('hard_dataset_path', hard_dataset_path)
+hard_df_to_query = pd.read_csv(hard_dataset_path, quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
+
+easy_dataset_path = os.path.join(GPU_FLOPBENCH_ROOT, 'dataset-creation', 'kernels_to_inference_balanced_with_compile_commands.csv')
+print('easy_dataset_path', easy_dataset_path)
+easy_df_to_query = pd.read_csv(easy_dataset_path, quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
+
+try:
+    # for some reason, the AzureChatOpenAI class fails to initialize properly
+    # because it seems like it tries to reach out to the node to get metadata or check alive state
+    # if the node is not set up for a particular model, we get a 404 error
+    # we put this guard here to avoid erroring out when we are not using Azure
+    azureModel = AzureChatOpenAI(
+        openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+        azure_endpoint="https://galor-m8yvytc2-swedencentral.cognitiveservices.azure.com",
+        openai_api_version="2025-04-01-preview",
+        temperature=1,
+        top_p=1,
+        model_name="gpt-5-mini",
+        timeout=120,
+        ).configurable_fields(
+        model_name=ConfigurableField(
+            id="model",
+        ),
+        temperature=ConfigurableField(
+            id="temp",
+        ),
+        top_p=ConfigurableField(
+            id="top_p",
+        ),
+        azure_endpoint=ConfigurableField(
+            id="provider_url",
+        ),
+        openai_api_key=ConfigurableField( 
+            id="provider_api_key",
+        ),
+        openai_api_version=ConfigurableField(
+            id="api_version",
+        ),
+        request_timeout=ConfigurableField(
+            id="timeout"
+        )
+    )
+except Exception as e:
+    print(f"Azure model could not be setup correctly! Falling back to OpenAI model in its place.", flush=True)
+    print(f"Error: {e}", flush=True)
+
+    azureModel = ChatOpenAI()
+
+
+openrouterModel = ChatOpenAI(
+    openai_api_key=os.getenv("OPENAI_API_KEY"),
+    openai_api_base="https://openrouter.ai/api/v1",
+    temperature=0.2,
+    top_p=0.1,
+    model_name="openai/gpt-5-mini",
+    timeout=120,
+    ).configurable_fields(
+    model_name=ConfigurableField(
+        id="opr_model",
+    ),
+    temperature=ConfigurableField(
+        id="opr_temp",
+    ),
+    top_p=ConfigurableField(
+        id="opr_top_p",
+    ),
+    openai_api_base=ConfigurableField(
+        id="opr_provider_url",
+    ),
+    openai_api_key=ConfigurableField( 
+        id="opr_provider_api_key",
+    ),
+    request_timeout=ConfigurableField(
+        id="opr_timeout"
+    )
+)
+
+llm = openrouterModel.configurable_alternatives(
+    ConfigurableField(id="llm"),
+    default_key="openai",
+    azure=azureModel
+)
@@ -0,0 +1,27 @@
+
+
+#python3 ./run_llm_queries.py --skipConfirm --modelName openai/gpt-5-mini --numTrials 3 --verbose 2>&1 | tee -a ./gpt-5-mini-simplePrompt.log
+#python3 ./run_llm_queries.py --skipConfirm --modelName openai/gpt-5-mini --useFullPrompt --numTrials 3 --verbose 2>&1 | tee -a ./gpt-5-mini-fullPrompt.log
+#python3 ./run_llm_queries.py --hardDataset --useAzure --api_version 2025-04-01-preview --provider_url  https://galor-m8yvytc2-swedencentral.cognitiveservices.azure.com --skipConfirm --modelName gpt-5-mini --useFullPrompt --numTrials 3 --top_p 1.0 --temp 1.0 --verbose 2>&1 | tee -a ./gpt-5-mini-fullPrompt-hardDataset.log
+
+
+#python3 ./run_llm_queries.py --hardDataset --useAzure --api_version 2025-04-01-preview --provider_url  https://galor-m8yvytc2-swedencentral.cognitiveservices.azure.com --skipConfirm --modelName gpt-5-mini --numTrials 3 --top_p 1.0 --temp 1.0 --verbose 2>&1 | tee -a ./gpt-5-mini-simplePrompt-hardDataset.log
+
+# 4o-mini
+# https://galor-m8yvytc2-swedencentral.cognitiveservices.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2025-01-01-preview
+
+# o1-mini -- doesn't support system messages -- can't work on our platform
+# https://galor-m8yvytc2-swedencentral.cognitiveservices.azure.com/openai/deployments/o1-mini/chat/completions?api-version=2025-01-01-preview
+
+# o3-mini
+# https://galor-m8yvytc2-swedencentral.cognitiveservices.azure.com/openai/deployments/o3-mini/chat/completions?api-version=2025-01-01-preview
+
+
+
+# 4o-mini
+python3 ./run_llm_queries.py --useAzure --api_version 2025-01-01-preview --provider_url  https://galor-m8yvytc2-swedencentral.cognitiveservices.azure.com --skipConfirm --modelName gpt-4o-mini --numTrials 3 --top_p 0.5 --temp 0.2 --verbose 2>&1 | tee -a ./gpt-4o-mini-simplePrompt-easyDataset.log
+python3 ./run_llm_queries.py --useAzure --api_version 2025-01-01-preview --provider_url  https://galor-m8yvytc2-swedencentral.cognitiveservices.azure.com --skipConfirm --modelName gpt-4o-mini --numTrials 3 --top_p 0.5 --temp 0.2 --verbose --hardDataset 2>&1 | tee -a ./gpt-4o-mini-simplePrompt-hardDataset.log
+
+# o3-mini
+python3 ./run_llm_queries.py --useAzure --api_version 2025-01-01-preview --provider_url  https://galor-m8yvytc2-swedencentral.cognitiveservices.azure.com --skipConfirm --modelName o3-mini --numTrials 3 --top_p 1.0 --temp 1.0 --verbose 2>&1 | tee -a ./o3-mini-simplePrompt-easyDataset.log
+python3 ./run_llm_queries.py --useAzure --api_version 2025-01-01-preview --provider_url  https://galor-m8yvytc2-swedencentral.cognitiveservices.azure.com --skipConfirm --modelName o3-mini --numTrials 3 --top_p   1.0 --temp 1.0 --verbose --hardDataset 2>&1 | tee -a ./o3-mini-simplePrompt-hardDataset.log