Elixir:提取大型结构化数据中的元素

Elixir: extract element in a large structured data

我用 HTTPoison 发出了一个获取请求,但现在我正在尝试解析 JSON 文件。为此,我正在使用 Poison 库。但是我在从 JSON 对象中获取元素时遇到了一些问题。

这是我的代码:

def parse_json do
    IO.puts("\nLet's parse JSON file.")

    url = "https://entreprise.data.gouv.fr/api/sirene/v3/etablissements/?page=1&per_page=1&etat_administratif=A&denomination_usuelle=" <> "SAFENERGY"

    start()
    case get(url) do
      {:ok, %{status_code: 200, body: body}} ->
        IO.puts("Success research.")
        decode!(body)
        #|> Map.to_list() NON-FUNCTIONAL
        #|> Enum.find(&match?(["etablissements" | _], &1)) NON-FUNCTIONAL
        #|> Enum.find_value(fn %{"siren" => siren} -> siren end) NON-FUNCTIONAL
        #|> IO.puts(&(&1.siren)) NON-FUNCTIONAL
        |> IO.inspect()

      {:ok, %{status_code: 404}} ->
        IO.puts("None match between establishment's name and Sirene API.")

      {:ok, %{status_code: 500}} ->
        IO.puts("Nonfunctional Sirene API's server (maintenance...).")

      {:ok, %{status_code: 429}} ->
        IO.puts("Exceeding the maximum call volume (7 requests/s maximum).")

      {:error, %{reason: reason}} ->
        IO.puts("Failure research.")
        IO.inspect(reason)

      _ -> IO.puts("Unknown error! Good luck to find it!")
    end
  end

我的输出:

%{
  "etablissements" => [
    %{
      "statut_diffusion" => "O",
      "libelle_voie" => "MAS DES PERES",
      "distribution_speciale_2" => nil,
      "geo_ligne" => "G",
      "libelle_voie_2" => nil,
      "unite_legale_id" => 33014385,
      "type_voie_2" => nil,
      "geo_type" => "street",
      "code_postal_2" => nil,
      "libelle_pays_etranger" => nil,
      "indice_repetition_2" => nil,
      "libelle_commune" => "MAUGUIO",
      "siret" => "48944519700036",
      "id" => 70725092,
      "distribution_speciale" => nil,
      "indice_repetition" => nil,
      "siren" => "489445197",
      "complement_adresse" => nil,
      "unite_legale" => %{
        "statut_diffusion" => "O",
        "nic_siege" => "00036",
        "prenom_usuel" => nil,
        "sigle" => nil,
        "denomination" => "SAFENERGY",
        "id" => 33014385,
        "pseudonyme" => nil,
        "nom_usage" => nil,
        "siren" => "489445197",
        "date_dernier_traitement" => "2019-11-13T15:06:19",
        "annee_effectifs" => nil,
        "categorie_entreprise" => "PME",
        "tranche_effectifs" => nil,
        "identifiant_association" => nil,
        "sexe" => nil,
        "prenom_2" => nil,
        "caractere_employeur" => "N",
        "nom" => nil,
        "created_at" => "2020-07-02T02:56:19.773+02:00",
        "economie_sociale_solidaire" => "N",
        "prenom_4" => nil,
        "date_fin" => nil,
        "date_debut" => "2019-07-01",
        "prenom_3" => nil,
        "date_creation" => "2006-04-01",
        "annee_categorie_entreprise" => "2017",
        "denomination_usuelle_2" => nil,
        "denomination_usuelle_3" => nil,
        "denomination_usuelle_1" => nil,
        ...
      },
      "date_dernier_traitement" => "2019-11-13T15:06:19",
      "annee_effectifs" => nil,
      "denomination_usuelle" => "SAFENERGY",
      "code_pays_etranger" => nil,
      "complement_adresse_2" => nil,
      "tranche_effectifs" => nil,
      "enseigne_1" => nil,
      "numero_voie_2" => nil,
      "geo_id" => "34154_b163",
      "activite_principale_registre_metiers" => nil,
      "geo_l5" => nil,
      "caractere_employeur" => "N",
      "geo_l4" => "MAS DES PERES",
      "nic" => "00036",
      "code_postal" => "34130",
      "libelle_cedex_2" => nil,
      "created_at" => "2020-07-02T03:12:45.814+02:00",
      "numero_voie" => "9002",
      "longitude" => "3.967449",
      "type_voie" => nil,
      "date_debut" => "2019-07-01",
      "code_cedex_2" => nil,
      "date_creation" => "2019-07-01",
      "code_commune_2" => nil,
      "libelle_pays_etranger_2" => nil,
      "libelle_commune_etranger" => nil,
      "latitude" => "43.603798",
      "code_cedex" => nil,
      "geo_score" => "0.95",
      ...
    }
  ],
  "meta" => %{
    "page" => 1,
    "per_page" => 1,
    "total_pages" => 1,
    "total_results" => 1
  }
}

正如您在上面看到的(评论中的四行),例如,我正在尝试提取“警笛”元素,但它失败了……我是不是偏离了目标?

我猜您想遍历所有“etablissements”并检索所有 SIREN 号码。您可以尝试类似的操作:

body
|> decode!()
|> Map.get("etablissements")
|> Enum.map(fn etablissement -> Map.get(etablissement, "siren") end)

假设数据结构如上所示。请注意 Enum.map/2Map 模块无关,但会在 "etablissements" 列表的每个条目上运行函数 fn etablissement -> Map.get(etablissement, "siren") end

这应该 return 包含所有 SIREN 号码的列表。如果可能有重复条目,您可以使用 Enum.uniq/1.

这里特别用Access in general and Kernel.get_in/2

body
|> decode!()
|> get_in(["etablissements", Access.all(), "unite_legale", "siren"])
#⇒ "0"

使用 Access 您可以过滤每一步的结果,获取所有分支等。这里我们使用 Access.all/0 来获取列表的所有元素。