D3.js WordCloud:单词重叠且间距和分布奇怪

D3.js WordCloud: Words overlap and have weird spacing & distribution

我是 D3.js 的新手,我正在尝试使用 Jason Davies 的 D3 词云库制作词云。 我不确定为什么频率较低的词会重叠,而且词的分布是这样的,中间有很多空白。我希望它更简洁、更有条理。这是

我在这里看到了类似的问题,并且尝试了很多前面提到的解决方案,例如:

  1. 改变 svg 和 canvas 尺寸。
  2. 更改字体样式 .font('Helvetica')
  3. 使用 10 个词、30 个词和 50 个词的变体。
  4. 指定了这样的文本访问器函数.text(function(d) { return d.word; })
  5. 探索 .padding()
  6. 中的选项
  7. 二手.rotate(0)

这是我在 JS Fiddle.

上的代码

对于单词的大小,我在代码中使用 rank 而不是单词出现的 frequency 。这是因为由于我的数据集中的异常值,尺寸调整被打乱了。假设频率最高的是32,频率最低的是1,这两个尺寸按比例映射,差别很大。

所以我选择根据 rank 来调整单词的大小。我已经使用 Javascript 对这些数据进行排序。

这是我的第一个问题,如果不清楚,请见谅。 希望对此有任何反馈或 help/solution!

提前致谢:)

    // set the dimensions and margins of the graph
    var margin = { top: 10, right: 10, bottom: 10, left: 10 },
        width = 600 - margin.left - margin.right,
        height = 550 - margin.top - margin.bottom;

        var dataset =  {
                        "talk": 2,
                        "customer": 3,
                        "helpful": 1,
                        "upgrade": 2,
                        "excite": 12,
                        "yesterday": 6,
                        "feedback": 5,
                        "staging": 2,
                        "good": 12,
                        "work": 28,
                        "nice": 9,
                        "ship": 4,
                        "cool": 5,
                        "planner": 2,
                        "homepage": 2,
                        "awesome": 2,
                        "call": 3,
                        "week": 20,
                        "monthly": 2,
                        "focus": 6,
                        "marketing": 6,
                        "website": 7,
                        "annoy": 2,
                        "launch": 5,
                        "today": 7,
                        "nashville": 5,
                        "people": 8,
                        "golf": 2,
                        "afternoon": 6,
                        "snow": 6,
                        "tomorrow": 8,
                        "ph": 5,
                        "email": 4,
                        "exist": 2,
                        "user": 13,
                        "time": 14,
                        "morning": 13,
                        "early": 4,
                        "add": 4,
                        "product": 8,
                        "day": 13,
                        "block": 4,
                        "weekend": 7,
                        "bitcoin": 1,
                        "trillion": 1,
                        "move": 5,
                        "peep": 2,
                        "integration": 6,
                        "drive": 5,
                        "help": 4
                    }

    // append the svg object to the body of the page
    var svg = d3.select("#my_dataviz").append("svg")
        .attr("width", width + margin.left + margin.right)
        .attr("height", height + margin.top + margin.bottom)
        .append("g")
        .attr("transform",
            "translate(" + margin.left + "," + margin.top + ")");

    // convert word dictionary from json into a LIST for d3 graphs
    var word_list = Object.entries(dataset)

    // sort based on its value
    word_list = word_list.sort(function(a, b) { return a[1] - b[1]; });

    // For color gradient: computing max and min values for mapping color
    var maxValue = d3.max(word_list, function(d) { return d[1] });
    var minValue = d3.min(word_list, function(d) { return d[1] });

    // Color gradient scale for highest to lowest shade 
    var color = d3.scaleSequential()
        .interpolator(d3.interpolatePurples) // built in purple color scheme
        .domain([minValue, maxValue - 3]);

    // create function for the range of colors
    function get_rgb(from_percentage, to_percentage, value) {
        return (maxValue * from_percentage / 100 + value * (to_percentage - from_percentage) / 100)
    }

    var prev_frequency = 0
    var rank = 0

    word_list.forEach(function(d) {
        var frequency = d[1]

        if (frequency != prev_frequency) {
            rank += 1
        }

        d.rank = rank

        prev_frequency = frequency


        d.color = color(get_rgb(50, 100, frequency)); // enter the range: "from" and "to" value of the color scale here 
    });

    // Constructs a new cloud layout instance. It runs an algorithm to find the position of words that suits your requirements
    // Wordcloud features that are different from one word to the other must be here
    var layout = d3.layout.cloud()
        .size([width - 150, height - 200])
        .words(word_list)
        .padding(7) //space between words
        .rotate(0)
        .font('Helvetica')
        .fontWeight("bold")
        .fontSize(function(d) { return d.rank; }) // font size of words
        .text(function(d) { return d[0]; })
        .on("end", draw);
    layout.start();

    // This function takes the output of 'layout' above and draw the words
    // Wordcloud features that are THE SAME from one word to the other can be here
    function draw(words) {

        d3.select('#my_dataviz')
            .append('div')
            .attr('id', 'tooltip');
        // .attr('style', 'position: absolute; opacity: 0;');

        svg
            .append("g")
            .attr("transform", "translate(" + layout.size()[0] / 2 + "," + layout.size()[1] / 2 + ")")
            .selectAll("text")
            .data(words)
            .enter().append("text")
            .style("font-size", function(d) { return 5 + d.rank * 5 / 6; })
            // .style("fill", "#69b3a2")
            .attr("text-anchor", "middle")
            .style("font-family", "Helvetica")
            // .attr('font-family', 'Impact')
            .attr("fill", function(d) { return d.color; })
            .attr("transform", function(d) {
                return "translate(" + [d.x, d.y] + ")rotate(" + d.rotate + ")";
            })
            .text(function(d) { return d[0]; })
            .on('mouseover', function(event, d) {
                d3.select('#tooltip')
                    .style('opacity', 1)
                    .text(`${d[1]} times`)
                    .style('left', (event.pageX) + 'px')
                    .style('top', (event.pageY) + 'px')
            })
            .on('mouseout', function(event, d) {
                d3.select('#tooltip')
                    .style('opacity', 0)
            });
    }
#tooltip {
    position: absolute;
    opacity: 0;
    background-color: rgba(136, 136, 136, 0.884);
    font-family: Arial, Helvetica, sans-serif;
    font-size: 5pt;
    border-radius: 2px;
    padding: 3px;
    pointer-events: none;
    color: rgba(255, 255, 255, 0.842);
    letter-spacing: 0.25px;
}
<!DOCTYPE html>
<meta charset="utf-8">

<!-- Load d3.js -->
<script src="https://d3js.org/d3.v6.js"></script>
<link rel="stylesheet" type="text/css" href="style.css">

<!-- Load d3-cloud -->
<script src="https://cdn.jsdelivr.net/gh/holtzy/D3-graph-gallery@master/LIB/d3.layout.cloud.js"></script>

<script src="//d3js.org/d3-scale-chromatic.v0.3.min.js"></script>

<body>
    <div id="my_dataviz"></div>
    <script type="text/javascript" , src="wordcloud.js"></script>
</body>

布局使用的字体大小不一致:

 .fontSize(function(d) { return d.rank; }) // font size of words

以及您绘制的字体大小:

 .style("font-size", function(d) { return 5 + d.rank * 5 / 6; })

你想让布局考虑你正在绘制的文本的大小,所以你应该更新布局生成器以使用正确的字体大小:

 .fontSize(function(d) { return 5 + d.rank * 5 / 6; })

// set the dimensions and margins of the graph
    var margin = { top: 10, right: 10, bottom: 10, left: 10 },
        width = 600 - margin.left - margin.right,
        height = 550 - margin.top - margin.bottom;

        var dataset =  {
                        "talk": 2,
                        "customer": 3,
                        "helpful": 1,
                        "upgrade": 2,
                        "excite": 12,
                        "yesterday": 6,
                        "feedback": 5,
                        "staging": 2,
                        "good": 12,
                        "work": 28,
                        "nice": 9,
                        "ship": 4,
                        "cool": 5,
                        "planner": 2,
                        "homepage": 2,
                        "awesome": 2,
                        "call": 3,
                        "week": 20,
                        "monthly": 2,
                        "focus": 6,
                        "marketing": 6,
                        "website": 7,
                        "annoy": 2,
                        "launch": 5,
                        "today": 7,
                        "nashville": 5,
                        "people": 8,
                        "golf": 2,
                        "afternoon": 6,
                        "snow": 6,
                        "tomorrow": 8,
                        "ph": 5,
                        "email": 4,
                        "exist": 2,
                        "user": 13,
                        "time": 14,
                        "morning": 13,
                        "early": 4,
                        "add": 4,
                        "product": 8,
                        "day": 13,
                        "block": 4,
                        "weekend": 7,
                        "bitcoin": 1,
                        "trillion": 1,
                        "move": 5,
                        "peep": 2,
                        "integration": 6,
                        "drive": 5,
                        "help": 4
                    }

    // append the svg object to the body of the page
    var svg = d3.select("#my_dataviz").append("svg")
        .attr("width", width + margin.left + margin.right)
        .attr("height", height + margin.top + margin.bottom)
        .append("g")
        .attr("transform",
            "translate(" + margin.left + "," + margin.top + ")");

    // convert word dictionary from json into a LIST for d3 graphs
    var word_list = Object.entries(dataset)

    // sort based on its value
    word_list = word_list.sort(function(a, b) { return a[1] - b[1]; });

    // For color gradient: computing max and min values for mapping color
    var maxValue = d3.max(word_list, function(d) { return d[1] });
    var minValue = d3.min(word_list, function(d) { return d[1] });

    // Color gradient scale for highest to lowest shade 
    var color = d3.scaleSequential()
        .interpolator(d3.interpolatePurples) // built in purple color scheme
        .domain([minValue, maxValue - 3]);

    // create function for the range of colors
    function get_rgb(from_percentage, to_percentage, value) {
        return (maxValue * from_percentage / 100 + value * (to_percentage - from_percentage) / 100)
    }

    var prev_frequency = 0
    var rank = 0

    word_list.forEach(function(d) {
        var frequency = d[1]

        if (frequency != prev_frequency) {
            rank += 1
        }

        d.rank = rank

        prev_frequency = frequency


        d.color = color(get_rgb(50, 100, frequency)); // enter the range: "from" and "to" value of the color scale here 
    });

    // Constructs a new cloud layout instance. It runs an algorithm to find the position of words that suits your requirements
    // Wordcloud features that are different from one word to the other must be here
    var layout = d3.layout.cloud()
        .size([width, height])
        .words(word_list)
        .padding(2) //space between words
        .rotate(0)
        .font('Helvetica')
        .fontWeight("bold")
        .fontSize(function(d) { return 15 + d.rank * 5 / 3; })
        .text(function(d) { return d[0]; })
        .on("end", draw);
    layout.start();

    // This function takes the output of 'layout' above and draw the words
    // Wordcloud features that are THE SAME from one word to the other can be here
    function draw(words) {

        d3.select('#my_dataviz')
            .append('div')
            .attr('id', 'tooltip');
        // .attr('style', 'position: absolute; opacity: 0;');

        svg
            .append("g")
            .attr("transform", "translate(" + layout.size()[0] / 2 + "," + layout.size()[1] / 2 + ")")
            .selectAll("text")
            .data(words)
            .enter().append("text")
            .attr("font-size", function(d) { return 15 + d.rank * 5 / 3 + "px"; })
            // .style("fill", "#69b3a2")
            .attr("text-anchor", "middle")
            .style("font-family", "Helvetica")
            // .attr('font-family', 'Impact')
            .attr("fill", function(d) { return d.color; })
            .attr("transform", function(d) {
                return "translate(" + [d.x, d.y] + ")rotate(" + d.rotate + ")";
            })
            .text(function(d) { return d[0]; })
            .on('mouseover', function(event, d) {
                d3.select('#tooltip')
                    .style('opacity', 1)
                    .text(`${d[1]} times`)
                    .style('left', (event.pageX) + 'px')
                    .style('top', (event.pageY) + 'px')
            })
            .on('mouseout', function(event, d) {
                d3.select('#tooltip')
                    .style('opacity', 0)
            });
    }
#tooltip {
    position: absolute;
    opacity: 0;
    background-color: rgba(136, 136, 136, 0.884);
    font-family: Arial, Helvetica, sans-serif;
    font-size: 5pt;
    border-radius: 2px;
    padding: 3px;
    pointer-events: none;
    color: rgba(255, 255, 255, 0.842);
    letter-spacing: 0.25px;
}
<!DOCTYPE html>
<meta charset="utf-8">

<!-- Load d3.js -->
<script src="https://d3js.org/d3.v6.js"></script>
<link rel="stylesheet" type="text/css" href="style.css">

<!-- Load d3-cloud -->
<script src="https://cdn.jsdelivr.net/gh/holtzy/D3-graph-gallery@master/LIB/d3.layout.cloud.js"></script>

<script src="//d3js.org/d3-scale-chromatic.v0.3.min.js"></script>

<body>
    <div id="my_dataviz"></div>
    <script type="text/javascript" , src="wordcloud.js"></script>
</body>

(在输入循环中设置字体大小时渲染文本时指定单位也没有什么坏处——除了上面提到的更改之外我还做了一个更改)

至于 g 元素的大小:这是其中内容的函数:您可以使用更大的字体大小并增加提供给词云布局生成器的大小(因为我也以上都完成了)。